treat 1.2.0 → 2.0.0rc1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +2 -2
- data/README.md +12 -21
- data/lib/treat/autoload.rb +44 -0
- data/lib/treat/config/config.rb +38 -0
- data/lib/treat/config/configurable.rb +51 -0
- data/lib/treat/config/data/config.rb +50 -0
- data/lib/treat/config/data/core.rb +52 -0
- data/lib/treat/config/data/databases.rb +10 -0
- data/lib/treat/config/data/entities.rb +15 -0
- data/lib/treat/config/data/languages/agnostic.rb +31 -0
- data/lib/treat/config/{languages → data/languages}/arabic.rb +0 -0
- data/lib/treat/config/{languages → data/languages}/chinese.rb +0 -0
- data/lib/treat/config/{languages → data/languages}/dutch.rb +1 -1
- data/lib/treat/config/data/languages/english.rb +95 -0
- data/lib/treat/config/data/languages/french.rb +148 -0
- data/lib/treat/config/data/languages/german.rb +135 -0
- data/lib/treat/config/{languages → data/languages}/greek.rb +1 -1
- data/lib/treat/config/data/languages/italian.rb +162 -0
- data/lib/treat/config/data/languages/polish.rb +11 -0
- data/lib/treat/config/{languages → data/languages}/portuguese.rb +1 -1
- data/lib/treat/config/{languages → data/languages}/russian.rb +1 -1
- data/lib/treat/config/data/languages/spanish.rb +291 -0
- data/lib/treat/config/data/languages/swedish.rb +289 -0
- data/lib/treat/config/data/libraries.rb +12 -0
- data/lib/treat/config/data/linguistics.rb +44 -0
- data/lib/treat/config/data/tags.rb +328 -0
- data/lib/treat/config/{workers → data/workers}/extractors.rb +2 -10
- data/lib/treat/config/{workers → data/workers}/formatters.rb +0 -0
- data/lib/treat/config/{workers → data/workers}/inflectors.rb +0 -0
- data/lib/treat/config/{workers → data/workers}/learners.rb +0 -0
- data/lib/treat/config/{workers → data/workers}/lexicalizers.rb +4 -3
- data/lib/treat/config/{workers → data/workers}/processors.rb +3 -3
- data/lib/treat/config/{workers → data/workers}/retrievers.rb +0 -0
- data/lib/treat/config/importable.rb +31 -0
- data/lib/treat/config/paths.rb +23 -0
- data/lib/treat/config/tags.rb +37 -0
- data/lib/treat/core/dsl.rb +55 -0
- data/lib/treat/{installer.rb → core/installer.rb} +10 -12
- data/lib/treat/core/server.rb +40 -0
- data/lib/treat/entities/entities.rb +101 -0
- data/lib/treat/entities/{abilities/doable.rb → entity/applicable.rb} +5 -3
- data/lib/treat/entities/{abilities → entity}/buildable.rb +118 -63
- data/lib/treat/entities/{abilities → entity}/checkable.rb +2 -2
- data/lib/treat/entities/{abilities → entity}/comparable.rb +6 -6
- data/lib/treat/entities/{abilities → entity}/countable.rb +2 -1
- data/lib/treat/entities/entity/debuggable.rb +86 -0
- data/lib/treat/entities/{abilities → entity}/delegatable.rb +16 -26
- data/lib/treat/entities/{abilities → entity}/exportable.rb +2 -2
- data/lib/treat/entities/{abilities → entity}/iterable.rb +4 -16
- data/lib/treat/entities/{abilities → entity}/magical.rb +22 -17
- data/lib/treat/entities/entity/registrable.rb +36 -0
- data/lib/treat/entities/{abilities → entity}/stringable.rb +18 -15
- data/lib/treat/entities/entity.rb +86 -77
- data/lib/treat/exception.rb +3 -0
- data/lib/treat/helpers/hash.rb +29 -0
- data/lib/treat/helpers/help.rb +35 -0
- data/lib/treat/helpers/object.rb +55 -0
- data/lib/treat/helpers/string.rb +124 -0
- data/lib/treat/{core → learning}/data_set.rb +11 -11
- data/lib/treat/{core → learning}/export.rb +3 -3
- data/lib/treat/{core → learning}/problem.rb +26 -16
- data/lib/treat/{core → learning}/question.rb +5 -9
- data/lib/treat/loaders/linguistics.rb +8 -9
- data/lib/treat/loaders/stanford.rb +5 -11
- data/lib/treat/modules.rb +33 -0
- data/lib/treat/proxies/array.rb +27 -0
- data/lib/treat/proxies/language.rb +47 -0
- data/lib/treat/proxies/number.rb +18 -0
- data/lib/treat/proxies/proxy.rb +25 -0
- data/lib/treat/proxies/string.rb +18 -0
- data/lib/treat/version.rb +10 -1
- data/lib/treat/{workers.rb → workers/categorizable.rb} +18 -19
- data/lib/treat/workers/extractors/keywords/tf_idf.rb +11 -11
- data/lib/treat/workers/extractors/language/what_language.rb +8 -6
- data/lib/treat/workers/extractors/name_tag/stanford.rb +10 -4
- data/lib/treat/workers/extractors/similarity/levenshtein.rb +36 -0
- data/lib/treat/workers/extractors/similarity/tf_idf.rb +27 -0
- data/lib/treat/workers/extractors/tf_idf/native.rb +4 -4
- data/lib/treat/workers/extractors/time/chronic.rb +2 -4
- data/lib/treat/workers/extractors/time/nickel.rb +19 -20
- data/lib/treat/workers/extractors/time/ruby.rb +2 -1
- data/lib/treat/workers/extractors/topic_words/lda.rb +12 -12
- data/lib/treat/workers/extractors/topics/reuters.rb +9 -13
- data/lib/treat/workers/formatters/readers/autoselect.rb +1 -1
- data/lib/treat/workers/formatters/readers/image.rb +19 -9
- data/lib/treat/workers/formatters/readers/odt.rb +2 -1
- data/lib/treat/workers/formatters/readers/pdf.rb +20 -3
- data/lib/treat/workers/formatters/readers/xml.rb +0 -1
- data/lib/treat/workers/formatters/serializers/mongo.rb +10 -20
- data/lib/treat/workers/formatters/serializers/xml.rb +17 -26
- data/lib/treat/workers/formatters/serializers/yaml.rb +5 -4
- data/lib/treat/workers/formatters/unserializers/mongo.rb +4 -4
- data/lib/treat/workers/formatters/unserializers/xml.rb +3 -4
- data/lib/treat/workers/formatters/unserializers/yaml.rb +3 -4
- data/lib/treat/workers/formatters/visualizers/dot.rb +1 -0
- data/lib/treat/workers/formatters/visualizers/standoff.rb +2 -3
- data/lib/treat/workers/formatters/visualizers/tree.rb +2 -3
- data/lib/treat/workers/{group.rb → groupable.rb} +9 -9
- data/lib/treat/workers/inflectors/cardinalizers/linguistics.rb +1 -3
- data/lib/treat/workers/inflectors/conjugators/linguistics.rb +5 -7
- data/lib/treat/workers/inflectors/declensors/english.rb +13 -20
- data/lib/treat/workers/inflectors/declensors/linguistics.rb +29 -28
- data/lib/treat/workers/inflectors/ordinalizers/linguistics.rb +0 -2
- data/lib/treat/workers/inflectors/stemmers/porter.rb +8 -10
- data/lib/treat/workers/inflectors/stemmers/porter_c.rb +7 -7
- data/lib/treat/workers/inflectors/stemmers/uea.rb +3 -8
- data/lib/treat/workers/learners/classifiers/id3.rb +17 -14
- data/lib/treat/workers/learners/classifiers/linear.rb +15 -27
- data/lib/treat/workers/learners/classifiers/mlp.rb +32 -19
- data/lib/treat/workers/learners/classifiers/svm.rb +28 -21
- data/lib/treat/workers/lexicalizers/categorizers/from_tag.rb +19 -3
- data/lib/treat/workers/lexicalizers/sensers/wordnet.rb +15 -7
- data/lib/treat/workers/lexicalizers/taggers/brill/patch.rb +4 -1
- data/lib/treat/workers/lexicalizers/taggers/brill.rb +8 -19
- data/lib/treat/workers/lexicalizers/taggers/lingua.rb +4 -15
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +22 -13
- data/lib/treat/workers/processors/chunkers/autoselect.rb +2 -3
- data/lib/treat/workers/processors/chunkers/html.rb +1 -6
- data/lib/treat/workers/processors/parsers/enju.rb +2 -4
- data/lib/treat/workers/processors/parsers/stanford.rb +13 -7
- data/lib/treat/workers/processors/segmenters/punkt.rb +25 -11
- data/lib/treat/workers/processors/segmenters/scalpel.rb +20 -0
- data/lib/treat/workers/processors/segmenters/srx.rb +42 -0
- data/lib/treat/workers/processors/segmenters/stanford.rb +5 -5
- data/lib/treat/workers/processors/segmenters/tactful.rb +21 -11
- data/lib/treat/workers/processors/tokenizers/ptb.rb +40 -30
- data/lib/treat/workers/processors/tokenizers/punkt.rb +14 -19
- data/lib/treat/workers/processors/tokenizers/stanford.rb +38 -22
- data/lib/treat/workers/retrievers/indexers/ferret.rb +6 -3
- data/lib/treat/workers/retrievers/searchers/ferret.rb +2 -2
- data/lib/treat/workers/workers.rb +6 -0
- data/lib/treat.rb +18 -32
- data/models/MANIFEST +1 -0
- data/spec/core/data_set.rb +174 -0
- data/spec/core/export.rb +52 -0
- data/spec/core/problem.rb +144 -0
- data/spec/core/question.rb +52 -0
- data/spec/{collection.rb → entities/collection.rb} +20 -35
- data/spec/{document.rb → entities/document.rb} +3 -54
- data/spec/{entity.rb → entities/entity.rb} +10 -9
- data/spec/entities/phrase.rb +33 -0
- data/spec/{token.rb → entities/token.rb} +0 -57
- data/spec/entities/word.rb +3 -0
- data/spec/{zone.rb → entities/zone.rb} +0 -26
- data/spec/helper.rb +116 -32
- data/spec/sandbox.rb +258 -25
- data/spec/treat.rb +26 -34
- data/spec/workers/agnostic.rb +137 -0
- data/spec/workers/english.rb +194 -0
- data/spec/workers/examples/english/economist/hungarys_troubles.txt +46 -0
- data/spec/workers/examples/english/economist/saving_the_euro.odt +0 -0
- data/spec/{samples → workers/examples/english}/mathematicians/archimedes.abw +0 -0
- data/spec/{samples → workers/examples/english}/mathematicians/euler.html +0 -0
- data/spec/{samples → workers/examples/english}/mathematicians/gauss.pdf +0 -0
- data/spec/{samples → workers/examples/english}/mathematicians/leibniz.txt +0 -0
- data/spec/{samples → workers/examples/english}/mathematicians/newton.doc +0 -0
- data/spec/workers/examples/english/phrase.xml +5 -0
- data/spec/workers/examples/english/test.txt +1 -0
- data/spec/workers/language.rb +280 -0
- data/spec/workers.rb +28 -0
- metadata +122 -105
- data/lib/treat/config/core/acronyms.rb +0 -5
- data/lib/treat/config/core/encodings.rb +0 -8
- data/lib/treat/config/core/entities.rb +0 -2
- data/lib/treat/config/core/language.rb +0 -3
- data/lib/treat/config/core/paths.rb +0 -8
- data/lib/treat/config/core/syntax.rb +0 -1
- data/lib/treat/config/core/verbosity.rb +0 -1
- data/lib/treat/config/databases/default.rb +0 -1
- data/lib/treat/config/databases/mongo.rb +0 -1
- data/lib/treat/config/languages/agnostic.rb +0 -34
- data/lib/treat/config/languages/english.rb +0 -60
- data/lib/treat/config/languages/french.rb +0 -18
- data/lib/treat/config/languages/german.rb +0 -18
- data/lib/treat/config/languages/italian.rb +0 -12
- data/lib/treat/config/languages/polish.rb +0 -12
- data/lib/treat/config/languages/spanish.rb +0 -12
- data/lib/treat/config/languages/swedish.rb +0 -12
- data/lib/treat/config/libraries/punkt.rb +0 -1
- data/lib/treat/config/libraries/reuters.rb +0 -1
- data/lib/treat/config/libraries/stanford.rb +0 -1
- data/lib/treat/config/linguistics/categories.rb +0 -4
- data/lib/treat/config/linguistics/punctuation.rb +0 -33
- data/lib/treat/config/tags/aligned.rb +0 -221
- data/lib/treat/config/tags/enju.rb +0 -71
- data/lib/treat/config/tags/paris7.rb +0 -17
- data/lib/treat/config/tags/ptb.rb +0 -15
- data/lib/treat/config/workers/list.rb +0 -1
- data/lib/treat/config.rb +0 -135
- data/lib/treat/core.rb +0 -5
- data/lib/treat/entities/abilities/copyable.rb +0 -47
- data/lib/treat/entities/abilities/debuggable.rb +0 -83
- data/lib/treat/entities/abilities/registrable.rb +0 -46
- data/lib/treat/entities/collection.rb +0 -40
- data/lib/treat/entities/document.rb +0 -10
- data/lib/treat/entities/group.rb +0 -18
- data/lib/treat/entities/section.rb +0 -13
- data/lib/treat/entities/token.rb +0 -47
- data/lib/treat/entities/zone.rb +0 -12
- data/lib/treat/entities.rb +0 -6
- data/lib/treat/helpers/didyoumean.rb +0 -57
- data/lib/treat/helpers/escaping.rb +0 -15
- data/lib/treat/helpers/formatting.rb +0 -41
- data/lib/treat/helpers/objtohash.rb +0 -8
- data/lib/treat/helpers/platform.rb +0 -15
- data/lib/treat/helpers/reflection.rb +0 -17
- data/lib/treat/helpers/temporary.rb +0 -27
- data/lib/treat/helpers/verbosity.rb +0 -19
- data/lib/treat/helpers.rb +0 -5
- data/lib/treat/loaders.rb +0 -10
- data/lib/treat/proxies.rb +0 -106
- data/lib/treat/workers/formatters/unserializers/autoselect.rb +0 -17
- data/lib/treat/workers/inflectors/declensors/active_support.rb +0 -31
- data/lib/treat/workers/processors/tokenizers/tactful.rb +0 -68
- data/spec/core.rb +0 -441
- data/spec/phrase.rb +0 -112
- data/spec/word.rb +0 -111
@@ -0,0 +1,135 @@
|
|
1
|
+
{
|
2
|
+
dependencies: [
|
3
|
+
'punkt-segmenter',
|
4
|
+
'tactful_tokenizer',
|
5
|
+
'stanford-core-nlp'
|
6
|
+
],
|
7
|
+
workers: {
|
8
|
+
processors: {
|
9
|
+
segmenters: [:tactful, :punkt, :stanford, :scalpel],
|
10
|
+
tokenizers: [:stanford, :punkt],
|
11
|
+
parsers: [:stanford]
|
12
|
+
},
|
13
|
+
lexicalizers: {
|
14
|
+
taggers: [:stanford],
|
15
|
+
categorizers: [:from_tag]
|
16
|
+
}
|
17
|
+
},
|
18
|
+
stop_words:
|
19
|
+
[
|
20
|
+
"alle",
|
21
|
+
"allem",
|
22
|
+
"alles",
|
23
|
+
"andere",
|
24
|
+
"anderem",
|
25
|
+
"anderen",
|
26
|
+
"anderer",
|
27
|
+
"anderes",
|
28
|
+
"auf",
|
29
|
+
"bei",
|
30
|
+
"beim",
|
31
|
+
"bist",
|
32
|
+
"dadurch",
|
33
|
+
"dein",
|
34
|
+
"deine",
|
35
|
+
"deiner",
|
36
|
+
"deines",
|
37
|
+
"deins",
|
38
|
+
"dem",
|
39
|
+
"denen",
|
40
|
+
"der",
|
41
|
+
"deren",
|
42
|
+
"des",
|
43
|
+
"deshalb",
|
44
|
+
"dessen",
|
45
|
+
"diese",
|
46
|
+
"diesem",
|
47
|
+
"diesen",
|
48
|
+
"dieser",
|
49
|
+
"dieses",
|
50
|
+
"ein",
|
51
|
+
"eine",
|
52
|
+
"einem",
|
53
|
+
"einen",
|
54
|
+
"einer",
|
55
|
+
"eines",
|
56
|
+
"euer",
|
57
|
+
"euere",
|
58
|
+
"eueren",
|
59
|
+
"eueres",
|
60
|
+
"für",
|
61
|
+
"haben",
|
62
|
+
"habt",
|
63
|
+
"hatte",
|
64
|
+
"hatten",
|
65
|
+
"hattest",
|
66
|
+
"hattet",
|
67
|
+
"hierzu",
|
68
|
+
"hinter",
|
69
|
+
"ich",
|
70
|
+
"ihr",
|
71
|
+
"ihre",
|
72
|
+
"ihren",
|
73
|
+
"ihrer",
|
74
|
+
"ihres",
|
75
|
+
"indem",
|
76
|
+
"ist",
|
77
|
+
"jede",
|
78
|
+
"jedem",
|
79
|
+
"jeden",
|
80
|
+
"jeder",
|
81
|
+
"jedes",
|
82
|
+
"kann",
|
83
|
+
"kannst",
|
84
|
+
"können",
|
85
|
+
"könnt",
|
86
|
+
"konnte",
|
87
|
+
"konnten",
|
88
|
+
"konntest",
|
89
|
+
"konntet",
|
90
|
+
"mehr",
|
91
|
+
"mein",
|
92
|
+
"meine",
|
93
|
+
"meiner",
|
94
|
+
"meines",
|
95
|
+
"meins",
|
96
|
+
"nach",
|
97
|
+
"neben",
|
98
|
+
"nicht",
|
99
|
+
"nichts",
|
100
|
+
"seid",
|
101
|
+
"sein",
|
102
|
+
"seine",
|
103
|
+
"seiner",
|
104
|
+
"seines",
|
105
|
+
"seins",
|
106
|
+
"sie",
|
107
|
+
"sind",
|
108
|
+
"über",
|
109
|
+
"und",
|
110
|
+
"uns",
|
111
|
+
"unser",
|
112
|
+
"unsere",
|
113
|
+
"unter",
|
114
|
+
"vor",
|
115
|
+
"warst",
|
116
|
+
"weil",
|
117
|
+
"wenn",
|
118
|
+
"werde",
|
119
|
+
"werden",
|
120
|
+
"werdet",
|
121
|
+
"willst",
|
122
|
+
"wir",
|
123
|
+
"wird",
|
124
|
+
"wirst",
|
125
|
+
"wollen",
|
126
|
+
"wollt",
|
127
|
+
"wollte",
|
128
|
+
"wollten",
|
129
|
+
"wolltest",
|
130
|
+
"wolltet",
|
131
|
+
"zum",
|
132
|
+
"zur"
|
133
|
+
]
|
134
|
+
}
|
135
|
+
|
@@ -0,0 +1,162 @@
|
|
1
|
+
{
|
2
|
+
dependencies: [
|
3
|
+
'punkt-segmenter',
|
4
|
+
'tactful_tokenizer'
|
5
|
+
],
|
6
|
+
workers: {
|
7
|
+
processors: {
|
8
|
+
segmenters: [:punkt],
|
9
|
+
tokenizers: []
|
10
|
+
}
|
11
|
+
},
|
12
|
+
stop_words:
|
13
|
+
[
|
14
|
+
"affinche",
|
15
|
+
"alcun",
|
16
|
+
"alcuna",
|
17
|
+
"alcune",
|
18
|
+
"alcuni",
|
19
|
+
"alcuno",
|
20
|
+
"allora",
|
21
|
+
"altra",
|
22
|
+
"altre",
|
23
|
+
"altri",
|
24
|
+
"altro",
|
25
|
+
"anziche",
|
26
|
+
"certa",
|
27
|
+
"certe",
|
28
|
+
"certi",
|
29
|
+
"certo",
|
30
|
+
"che",
|
31
|
+
"chi",
|
32
|
+
"chiunque",
|
33
|
+
"comunque",
|
34
|
+
"con",
|
35
|
+
"cosa",
|
36
|
+
"cose",
|
37
|
+
"cui",
|
38
|
+
"dagli",
|
39
|
+
"dai",
|
40
|
+
"dall",
|
41
|
+
"dalla",
|
42
|
+
"dalle",
|
43
|
+
"darsi",
|
44
|
+
"degli",
|
45
|
+
"del",
|
46
|
+
"dell",
|
47
|
+
"della",
|
48
|
+
"delle",
|
49
|
+
"dello",
|
50
|
+
"dunque",
|
51
|
+
"egli",
|
52
|
+
"eppure",
|
53
|
+
"esse",
|
54
|
+
"essi",
|
55
|
+
"forse",
|
56
|
+
"gia",
|
57
|
+
"infatti",
|
58
|
+
"inoltre",
|
59
|
+
"invece",
|
60
|
+
"lui",
|
61
|
+
"malgrado",
|
62
|
+
"mediante",
|
63
|
+
"meno",
|
64
|
+
"mentre",
|
65
|
+
"mie",
|
66
|
+
"miei",
|
67
|
+
"mio",
|
68
|
+
"modo",
|
69
|
+
"molta",
|
70
|
+
"molte",
|
71
|
+
"molti",
|
72
|
+
"molto",
|
73
|
+
"negli",
|
74
|
+
"nel",
|
75
|
+
"nella",
|
76
|
+
"nelle",
|
77
|
+
"nessun",
|
78
|
+
"nessuna",
|
79
|
+
"nessuno",
|
80
|
+
"niente",
|
81
|
+
"noi",
|
82
|
+
"nostra",
|
83
|
+
"nostre",
|
84
|
+
"nostri",
|
85
|
+
"nostro",
|
86
|
+
"nulla",
|
87
|
+
"occorre",
|
88
|
+
"ogni",
|
89
|
+
"ognuno",
|
90
|
+
"oltre",
|
91
|
+
"oltretutto",
|
92
|
+
"oppure",
|
93
|
+
"ovunque",
|
94
|
+
"ovvio",
|
95
|
+
"percio",
|
96
|
+
"pertanto",
|
97
|
+
"piu",
|
98
|
+
"piuttosto",
|
99
|
+
"poca",
|
100
|
+
"poco",
|
101
|
+
"poiche",
|
102
|
+
"propri",
|
103
|
+
"proprie",
|
104
|
+
"proprio",
|
105
|
+
"puo",
|
106
|
+
"qua",
|
107
|
+
"qual",
|
108
|
+
"qualche",
|
109
|
+
"qualcuna",
|
110
|
+
"qualcuno",
|
111
|
+
"quale",
|
112
|
+
"quali",
|
113
|
+
"qualunque",
|
114
|
+
"quando",
|
115
|
+
"quant",
|
116
|
+
"quante",
|
117
|
+
"quanti",
|
118
|
+
"quanto",
|
119
|
+
"quantunque",
|
120
|
+
"quegli",
|
121
|
+
"quei",
|
122
|
+
"quest",
|
123
|
+
"questa",
|
124
|
+
"queste",
|
125
|
+
"questi",
|
126
|
+
"questo",
|
127
|
+
"qui",
|
128
|
+
"quindi",
|
129
|
+
"sebbene",
|
130
|
+
"sembra",
|
131
|
+
"sempre",
|
132
|
+
"senza",
|
133
|
+
"soltanto",
|
134
|
+
"stessa",
|
135
|
+
"stesse",
|
136
|
+
"stessi",
|
137
|
+
"stesso",
|
138
|
+
"sugli",
|
139
|
+
"sui",
|
140
|
+
"sul",
|
141
|
+
"sull",
|
142
|
+
"sulla",
|
143
|
+
"sulle",
|
144
|
+
"suo",
|
145
|
+
"suoi",
|
146
|
+
"taluni",
|
147
|
+
"taluno",
|
148
|
+
"tanta",
|
149
|
+
"tanti",
|
150
|
+
"tanto",
|
151
|
+
"tra",
|
152
|
+
"tuo",
|
153
|
+
"tuoi",
|
154
|
+
"tutt",
|
155
|
+
"tutta",
|
156
|
+
"tutte",
|
157
|
+
"tutto",
|
158
|
+
"una",
|
159
|
+
"uno",
|
160
|
+
"voi"
|
161
|
+
]
|
162
|
+
}
|
@@ -0,0 +1,291 @@
|
|
1
|
+
{
|
2
|
+
dependencies: [
|
3
|
+
'punkt-segmenter',
|
4
|
+
'tactful_tokenizer'
|
5
|
+
],
|
6
|
+
workers: {
|
7
|
+
processors: {
|
8
|
+
segmenters: [:punkt],
|
9
|
+
tokenizers: []
|
10
|
+
}
|
11
|
+
},
|
12
|
+
stop_words:
|
13
|
+
[
|
14
|
+
"abans",
|
15
|
+
"aca",
|
16
|
+
"acerca",
|
17
|
+
"ahora",
|
18
|
+
"aixo",
|
19
|
+
"algo",
|
20
|
+
"algu",
|
21
|
+
"alguien",
|
22
|
+
"algun",
|
23
|
+
"alguna",
|
24
|
+
"algunas",
|
25
|
+
"algunes",
|
26
|
+
"alguno",
|
27
|
+
"algunos",
|
28
|
+
"alguns",
|
29
|
+
"alla",
|
30
|
+
"alli",
|
31
|
+
"allo",
|
32
|
+
"altra",
|
33
|
+
"altre",
|
34
|
+
"altres",
|
35
|
+
"amb",
|
36
|
+
"amunt",
|
37
|
+
"antes",
|
38
|
+
"aquel",
|
39
|
+
"aquell",
|
40
|
+
"aquella",
|
41
|
+
"aquellas",
|
42
|
+
"aquelles",
|
43
|
+
"aquellos",
|
44
|
+
"aquells",
|
45
|
+
"aquest",
|
46
|
+
"aquesta",
|
47
|
+
"aquestes",
|
48
|
+
"aquests",
|
49
|
+
"aqui",
|
50
|
+
"asimismo",
|
51
|
+
"aun",
|
52
|
+
"aunque",
|
53
|
+
"avall",
|
54
|
+
"cada",
|
55
|
+
"casi",
|
56
|
+
"com",
|
57
|
+
"como",
|
58
|
+
"con",
|
59
|
+
"cosas",
|
60
|
+
"coses",
|
61
|
+
"cual",
|
62
|
+
"cuales",
|
63
|
+
"cualquier",
|
64
|
+
"cuando",
|
65
|
+
"damunt",
|
66
|
+
"darrera",
|
67
|
+
"davant",
|
68
|
+
"debe",
|
69
|
+
"deben",
|
70
|
+
"deber",
|
71
|
+
"debia",
|
72
|
+
"debian",
|
73
|
+
"decia",
|
74
|
+
"decian",
|
75
|
+
"decir",
|
76
|
+
"deia",
|
77
|
+
"deien",
|
78
|
+
"del",
|
79
|
+
"demasiado",
|
80
|
+
"des",
|
81
|
+
"desde",
|
82
|
+
"despues",
|
83
|
+
"dicen",
|
84
|
+
"diciendo",
|
85
|
+
"dins",
|
86
|
+
"dir",
|
87
|
+
"diu",
|
88
|
+
"diuen",
|
89
|
+
"doncs",
|
90
|
+
"ell",
|
91
|
+
"ellas",
|
92
|
+
"elles",
|
93
|
+
"ells",
|
94
|
+
"els",
|
95
|
+
"encara",
|
96
|
+
"entonces",
|
97
|
+
"ese",
|
98
|
+
"esos",
|
99
|
+
"esser",
|
100
|
+
"esta",
|
101
|
+
"estan",
|
102
|
+
"estando",
|
103
|
+
"estant",
|
104
|
+
"estar",
|
105
|
+
"estaria",
|
106
|
+
"estarian",
|
107
|
+
"estarien",
|
108
|
+
"estas",
|
109
|
+
"estos",
|
110
|
+
"farien",
|
111
|
+
"feia",
|
112
|
+
"feien",
|
113
|
+
"fent",
|
114
|
+
"fue",
|
115
|
+
"fueron",
|
116
|
+
"gaire",
|
117
|
+
"gairebe",
|
118
|
+
"hace",
|
119
|
+
"hacia",
|
120
|
+
"hacian",
|
121
|
+
"haciendo",
|
122
|
+
"haran",
|
123
|
+
"hauria",
|
124
|
+
"haurien",
|
125
|
+
"hemos",
|
126
|
+
"hola",
|
127
|
+
"junto",
|
128
|
+
"lejos",
|
129
|
+
"les",
|
130
|
+
"lloc",
|
131
|
+
"los",
|
132
|
+
"menos",
|
133
|
+
"menys",
|
134
|
+
"meva",
|
135
|
+
"mias",
|
136
|
+
"mio",
|
137
|
+
"misma",
|
138
|
+
"mismas",
|
139
|
+
"mismo",
|
140
|
+
"mismos",
|
141
|
+
"molt",
|
142
|
+
"molta",
|
143
|
+
"moltes",
|
144
|
+
"mon",
|
145
|
+
"mucha",
|
146
|
+
"mucho",
|
147
|
+
"muy",
|
148
|
+
"nadie",
|
149
|
+
"ningu",
|
150
|
+
"nomes",
|
151
|
+
"nosaltres",
|
152
|
+
"nosotros",
|
153
|
+
"nostra",
|
154
|
+
"nostre",
|
155
|
+
"nuestra",
|
156
|
+
"nuestras",
|
157
|
+
"nuestro",
|
158
|
+
"nuestros",
|
159
|
+
"nunca",
|
160
|
+
"otra",
|
161
|
+
"pasa",
|
162
|
+
"pasan",
|
163
|
+
"pasara",
|
164
|
+
"pasaria",
|
165
|
+
"passara",
|
166
|
+
"passaria",
|
167
|
+
"passen",
|
168
|
+
"perque",
|
169
|
+
"poc",
|
170
|
+
"pocas",
|
171
|
+
"pocos",
|
172
|
+
"podem",
|
173
|
+
"poden",
|
174
|
+
"podeu",
|
175
|
+
"podria",
|
176
|
+
"podrian",
|
177
|
+
"podrien",
|
178
|
+
"poques",
|
179
|
+
"porque",
|
180
|
+
"potser",
|
181
|
+
"puc",
|
182
|
+
"pudieron",
|
183
|
+
"pudo",
|
184
|
+
"puede",
|
185
|
+
"pueden",
|
186
|
+
"puesto",
|
187
|
+
"qualsevol",
|
188
|
+
"quan",
|
189
|
+
"que",
|
190
|
+
"queria",
|
191
|
+
"querian",
|
192
|
+
"qui",
|
193
|
+
"quien",
|
194
|
+
"quienes",
|
195
|
+
"quiere",
|
196
|
+
"quieren",
|
197
|
+
"quin",
|
198
|
+
"quina",
|
199
|
+
"quines",
|
200
|
+
"quins",
|
201
|
+
"quizas",
|
202
|
+
"segueent",
|
203
|
+
"segun",
|
204
|
+
"sempre",
|
205
|
+
"seran",
|
206
|
+
"seria",
|
207
|
+
"serian",
|
208
|
+
"seu",
|
209
|
+
"seva",
|
210
|
+
"sido",
|
211
|
+
"siempre",
|
212
|
+
"siendo",
|
213
|
+
"siguiente",
|
214
|
+
"sino",
|
215
|
+
"sobretodo",
|
216
|
+
"solamente",
|
217
|
+
"sovint",
|
218
|
+
"suya",
|
219
|
+
"suyas",
|
220
|
+
"suyo",
|
221
|
+
"suyos",
|
222
|
+
"tambe",
|
223
|
+
"tambien",
|
224
|
+
"tanmateix",
|
225
|
+
"tanta",
|
226
|
+
"tanto",
|
227
|
+
"tendran",
|
228
|
+
"tendria",
|
229
|
+
"tendrian",
|
230
|
+
"tenen",
|
231
|
+
"teu",
|
232
|
+
"teva",
|
233
|
+
"tiene",
|
234
|
+
"tienen",
|
235
|
+
"tindran",
|
236
|
+
"tindria",
|
237
|
+
"tindrien",
|
238
|
+
"toda",
|
239
|
+
"todavia",
|
240
|
+
"todo",
|
241
|
+
"tota",
|
242
|
+
"totes",
|
243
|
+
"tras",
|
244
|
+
"traves",
|
245
|
+
"tuvieron",
|
246
|
+
"tuvo",
|
247
|
+
"tuya",
|
248
|
+
"tuyas",
|
249
|
+
"tuyo",
|
250
|
+
"tuyos",
|
251
|
+
"unas",
|
252
|
+
"unes",
|
253
|
+
"unos",
|
254
|
+
"uns",
|
255
|
+
"usaba",
|
256
|
+
"usaban",
|
257
|
+
"usada",
|
258
|
+
"usades",
|
259
|
+
"usado",
|
260
|
+
"usan",
|
261
|
+
"usando",
|
262
|
+
"usant",
|
263
|
+
"usar",
|
264
|
+
"usat",
|
265
|
+
"usava",
|
266
|
+
"usaven",
|
267
|
+
"usen",
|
268
|
+
"vaig",
|
269
|
+
"varem",
|
270
|
+
"varen",
|
271
|
+
"vareu",
|
272
|
+
"vegada",
|
273
|
+
"vegades",
|
274
|
+
"vez",
|
275
|
+
"volem",
|
276
|
+
"volen",
|
277
|
+
"voleu",
|
278
|
+
"vora",
|
279
|
+
"vos",
|
280
|
+
"vosaltres",
|
281
|
+
"vosotros",
|
282
|
+
"vostra",
|
283
|
+
"vostre",
|
284
|
+
"voy",
|
285
|
+
"vuestra",
|
286
|
+
"vuestras",
|
287
|
+
"vuestro",
|
288
|
+
"vuestros",
|
289
|
+
"vull"
|
290
|
+
]
|
291
|
+
}
|