treat 1.0.6 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (210) hide show
  1. data/LICENSE +2 -4
  2. data/README.md +13 -12
  3. data/bin/MANIFEST +1 -0
  4. data/bin/stanford/bridge.jar +0 -0
  5. data/bin/stanford/joda-time.jar +0 -0
  6. data/bin/stanford/stanford-corenlp.jar +0 -0
  7. data/bin/stanford/stanford-parser.jar +0 -0
  8. data/bin/stanford/xom.jar +0 -0
  9. data/files/{www.economist.com/21552208 → 21552208.html} +86 -89
  10. data/files/{guides.rubyonrails.org/3_2_release_notes.html → 3_2_release_notes.html} +0 -0
  11. data/files/{INFO → MANIFEST} +0 -0
  12. data/files/{www.rubyinside.com/nethttp-cheat-sheet-2940.html → nethttp-cheat-sheet-2940.html} +12 -16
  13. data/files/weather-central-canada-heat-wave.html +1370 -0
  14. data/lib/treat/config/core/acronyms.rb +4 -0
  15. data/lib/treat/config/core/encodings.rb +8 -0
  16. data/lib/treat/config/core/entities.rb +2 -0
  17. data/lib/treat/config/core/language.rb +3 -0
  18. data/lib/treat/config/core/paths.rb +8 -0
  19. data/lib/treat/config/core/syntax.rb +1 -0
  20. data/lib/treat/config/core/verbosity.rb +1 -0
  21. data/lib/treat/config/databases/mongo.rb +3 -0
  22. data/lib/treat/config/languages/agnostic.rb +34 -0
  23. data/lib/treat/config/languages/arabic.rb +13 -0
  24. data/lib/treat/config/languages/chinese.rb +13 -0
  25. data/lib/treat/config/languages/dutch.rb +12 -0
  26. data/lib/treat/config/languages/english.rb +60 -0
  27. data/lib/treat/config/languages/french.rb +18 -0
  28. data/lib/treat/config/languages/german.rb +18 -0
  29. data/lib/treat/config/languages/greek.rb +12 -0
  30. data/lib/treat/config/languages/italian.rb +12 -0
  31. data/lib/treat/config/languages/polish.rb +12 -0
  32. data/lib/treat/config/languages/portuguese.rb +12 -0
  33. data/lib/treat/config/languages/russian.rb +12 -0
  34. data/lib/treat/config/languages/spanish.rb +12 -0
  35. data/lib/treat/config/languages/swedish.rb +12 -0
  36. data/lib/treat/config/libraries/stanford.rb +1 -0
  37. data/lib/treat/config/linguistics/categories.rb +4 -0
  38. data/lib/treat/config/linguistics/punctuation.rb +33 -0
  39. data/lib/treat/config/tags/aligned.rb +221 -0
  40. data/lib/treat/config/tags/enju.rb +71 -0
  41. data/lib/treat/config/tags/paris7.rb +17 -0
  42. data/lib/treat/config/tags/ptb.rb +15 -0
  43. data/lib/treat/config/workers/extractors.rb +39 -0
  44. data/lib/treat/config/workers/formatters.rb +20 -0
  45. data/lib/treat/config/workers/inflectors.rb +27 -0
  46. data/lib/treat/config/workers/learners.rb +6 -0
  47. data/lib/treat/config/workers/lexicalizers.rb +18 -0
  48. data/lib/treat/config/workers/list.rb +1 -0
  49. data/lib/treat/config/workers/processors.rb +19 -0
  50. data/lib/treat/config/workers/retrievers.rb +12 -0
  51. data/lib/treat/config.rb +125 -0
  52. data/lib/treat/{classification.rb → core/classification.rb} +1 -1
  53. data/lib/treat/{data_set.rb → core/data_set.rb} +1 -4
  54. data/lib/treat/{tree.rb → core/node.rb} +5 -5
  55. data/lib/treat/core/server.rb +3 -0
  56. data/lib/treat/core.rb +5 -0
  57. data/lib/treat/entities/abilities/buildable.rb +61 -56
  58. data/lib/treat/entities/abilities/checkable.rb +2 -2
  59. data/lib/treat/entities/abilities/comparable.rb +21 -0
  60. data/lib/treat/entities/abilities/copyable.rb +2 -0
  61. data/lib/treat/entities/abilities/countable.rb +1 -1
  62. data/lib/treat/entities/abilities/debuggable.rb +1 -1
  63. data/lib/treat/entities/abilities/delegatable.rb +42 -36
  64. data/lib/treat/entities/abilities/doable.rb +2 -2
  65. data/lib/treat/entities/abilities/exportable.rb +1 -1
  66. data/lib/treat/entities/abilities/iterable.rb +21 -33
  67. data/lib/treat/entities/abilities/magical.rb +8 -8
  68. data/lib/treat/entities/abilities/registrable.rb +0 -38
  69. data/lib/treat/entities/abilities/stringable.rb +19 -19
  70. data/lib/treat/entities/collection.rb +31 -0
  71. data/lib/treat/entities/document.rb +10 -0
  72. data/lib/treat/entities/entity.rb +18 -13
  73. data/lib/treat/entities/group.rb +15 -0
  74. data/lib/treat/entities/section.rb +13 -0
  75. data/lib/treat/entities/token.rb +35 -0
  76. data/lib/treat/entities/zone.rb +11 -0
  77. data/lib/treat/entities.rb +5 -75
  78. data/lib/treat/helpers/didyoumean.rb +57 -0
  79. data/lib/treat/helpers/escaping.rb +15 -0
  80. data/lib/treat/helpers/formatting.rb +41 -0
  81. data/lib/treat/helpers/platform.rb +15 -0
  82. data/lib/treat/helpers/reflection.rb +17 -0
  83. data/lib/treat/helpers/temporary.rb +27 -0
  84. data/lib/treat/helpers/verbosity.rb +19 -0
  85. data/lib/treat/helpers.rb +5 -0
  86. data/lib/treat/installer.rb +46 -165
  87. data/lib/treat/loaders/linguistics.rb +22 -27
  88. data/lib/treat/loaders/stanford.rb +23 -41
  89. data/lib/treat/loaders.rb +10 -0
  90. data/lib/treat/proxies.rb +73 -24
  91. data/lib/treat/version.rb +3 -0
  92. data/lib/treat/{extractors → workers/extractors}/keywords/tf_idf.rb +1 -1
  93. data/lib/treat/{extractors → workers/extractors}/language/what_language.rb +11 -4
  94. data/lib/treat/{extractors → workers/extractors}/name_tag/stanford.rb +3 -4
  95. data/lib/treat/{extractors → workers/extractors}/tf_idf/native.rb +4 -5
  96. data/lib/treat/{extractors → workers/extractors}/time/chronic.rb +1 -1
  97. data/lib/treat/{extractors → workers/extractors}/time/nickel.rb +1 -1
  98. data/lib/treat/{extractors → workers/extractors}/time/ruby.rb +1 -1
  99. data/lib/treat/{extractors → workers/extractors}/topic_words/lda.rb +1 -1
  100. data/lib/treat/{extractors → workers/extractors}/topics/reuters.rb +4 -4
  101. data/lib/treat/{formatters → workers/formatters}/readers/abw.rb +2 -2
  102. data/lib/treat/{formatters → workers/formatters}/readers/autoselect.rb +10 -3
  103. data/lib/treat/{formatters → workers/formatters}/readers/doc.rb +2 -2
  104. data/lib/treat/{formatters → workers/formatters}/readers/html.rb +4 -4
  105. data/lib/treat/{formatters → workers/formatters}/readers/image.rb +2 -2
  106. data/lib/treat/{formatters → workers/formatters}/readers/odt.rb +2 -2
  107. data/lib/treat/{formatters → workers/formatters}/readers/pdf.rb +2 -2
  108. data/lib/treat/{formatters → workers/formatters}/readers/txt.rb +2 -2
  109. data/lib/treat/{formatters → workers/formatters}/readers/xml.rb +2 -2
  110. data/lib/treat/workers/formatters/serializers/mongo.rb +60 -0
  111. data/lib/treat/{formatters → workers/formatters}/serializers/xml.rb +1 -2
  112. data/lib/treat/{formatters → workers/formatters}/serializers/yaml.rb +1 -1
  113. data/lib/treat/{formatters → workers/formatters}/unserializers/autoselect.rb +3 -1
  114. data/lib/treat/workers/formatters/unserializers/mongo.rb +80 -0
  115. data/lib/treat/{formatters → workers/formatters}/unserializers/xml.rb +2 -2
  116. data/lib/treat/{formatters → workers/formatters}/unserializers/yaml.rb +1 -1
  117. data/lib/treat/{formatters → workers/formatters}/visualizers/dot.rb +1 -1
  118. data/lib/treat/{formatters → workers/formatters}/visualizers/standoff.rb +2 -3
  119. data/lib/treat/{formatters → workers/formatters}/visualizers/tree.rb +1 -1
  120. data/lib/treat/{groupable.rb → workers/group.rb} +6 -12
  121. data/lib/treat/{inflectors → workers/inflectors}/cardinalizers/linguistics.rb +7 -2
  122. data/lib/treat/{inflectors → workers/inflectors}/conjugators/linguistics.rb +11 -11
  123. data/lib/treat/{inflectors → workers/inflectors}/declensors/active_support.rb +2 -2
  124. data/lib/treat/{inflectors → workers/inflectors}/declensors/english/inflect.rb +1 -1
  125. data/lib/treat/{inflectors → workers/inflectors}/declensors/english.rb +2 -2
  126. data/lib/treat/{inflectors → workers/inflectors}/declensors/linguistics.rb +4 -4
  127. data/lib/treat/{inflectors → workers/inflectors}/ordinalizers/linguistics.rb +8 -2
  128. data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter.rb +2 -2
  129. data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter_c.rb +1 -1
  130. data/lib/treat/{inflectors → workers/inflectors}/stemmers/uea.rb +1 -1
  131. data/lib/treat/{ai → workers/learners}/classifiers/id3.rb +1 -1
  132. data/lib/treat/{ai → workers/learners}/classifiers/mlp.rb +1 -1
  133. data/lib/treat/{lexicalizers → workers/lexicalizers}/categorizers/from_tag.rb +9 -9
  134. data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet/synset.rb +2 -2
  135. data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet.rb +4 -4
  136. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill/patch.rb +2 -2
  137. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill.rb +2 -8
  138. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/lingua.rb +1 -6
  139. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/stanford.rb +31 -42
  140. data/lib/treat/workers/processors/chunkers/autoselect.rb +19 -0
  141. data/lib/treat/{processors → workers/processors}/chunkers/html.rb +4 -3
  142. data/lib/treat/workers/processors/chunkers/txt.rb +32 -0
  143. data/lib/treat/{processors → workers/processors}/parsers/enju.rb +3 -3
  144. data/lib/treat/{processors → workers/processors}/parsers/stanford.rb +6 -8
  145. data/lib/treat/{processors → workers/processors}/segmenters/punkt.rb +6 -10
  146. data/lib/treat/{processors → workers/processors}/segmenters/stanford.rb +2 -2
  147. data/lib/treat/{processors → workers/processors}/segmenters/tactful.rb +3 -6
  148. data/lib/treat/{processors → workers/processors}/tokenizers/ptb.rb +6 -5
  149. data/lib/treat/{processors → workers/processors}/tokenizers/punkt.rb +1 -1
  150. data/lib/treat/{processors → workers/processors}/tokenizers/stanford.rb +1 -1
  151. data/lib/treat/{processors → workers/processors}/tokenizers/tactful.rb +3 -5
  152. data/lib/treat/{retrievers → workers/retrievers}/indexers/ferret.rb +1 -1
  153. data/lib/treat/{retrievers → workers/retrievers}/searchers/ferret.rb +1 -1
  154. data/lib/treat/workers.rb +96 -0
  155. data/lib/treat.rb +23 -49
  156. data/spec/collection.rb +4 -4
  157. data/spec/document.rb +5 -5
  158. data/spec/entity.rb +33 -32
  159. data/spec/{tree.rb → node.rb} +5 -5
  160. data/spec/phrase.rb +5 -39
  161. data/spec/sandbox.rb +212 -6
  162. data/spec/token.rb +12 -9
  163. data/spec/treat.rb +12 -9
  164. data/spec/word.rb +10 -9
  165. data/spec/zone.rb +6 -2
  166. data/tmp/{INFO → MANIFEST} +0 -0
  167. data/tmp/english.yaml +10340 -0
  168. metadata +149 -139
  169. data/lib/treat/ai.rb +0 -12
  170. data/lib/treat/categories.rb +0 -90
  171. data/lib/treat/categorizable.rb +0 -44
  172. data/lib/treat/configurable.rb +0 -115
  173. data/lib/treat/dependencies.rb +0 -25
  174. data/lib/treat/downloader.rb +0 -87
  175. data/lib/treat/entities/abilities.rb +0 -10
  176. data/lib/treat/entities/entities.rb +0 -102
  177. data/lib/treat/exception.rb +0 -7
  178. data/lib/treat/extractors.rb +0 -79
  179. data/lib/treat/formatters/serializers/mongo.rb +0 -64
  180. data/lib/treat/formatters.rb +0 -41
  181. data/lib/treat/helpers/decimal_point_escaper.rb +0 -22
  182. data/lib/treat/inflectors.rb +0 -52
  183. data/lib/treat/kernel.rb +0 -208
  184. data/lib/treat/languages/arabic.rb +0 -16
  185. data/lib/treat/languages/chinese.rb +0 -16
  186. data/lib/treat/languages/dutch.rb +0 -16
  187. data/lib/treat/languages/english.rb +0 -63
  188. data/lib/treat/languages/french.rb +0 -20
  189. data/lib/treat/languages/german.rb +0 -20
  190. data/lib/treat/languages/greek.rb +0 -16
  191. data/lib/treat/languages/italian.rb +0 -17
  192. data/lib/treat/languages/language.rb +0 -10
  193. data/lib/treat/languages/list.txt +0 -504
  194. data/lib/treat/languages/polish.rb +0 -16
  195. data/lib/treat/languages/portuguese.rb +0 -16
  196. data/lib/treat/languages/russian.rb +0 -16
  197. data/lib/treat/languages/spanish.rb +0 -16
  198. data/lib/treat/languages/swedish.rb +0 -16
  199. data/lib/treat/languages.rb +0 -132
  200. data/lib/treat/lexicalizers.rb +0 -37
  201. data/lib/treat/object.rb +0 -7
  202. data/lib/treat/processors/chunkers/autoselect.rb +0 -16
  203. data/lib/treat/processors/chunkers/txt.rb +0 -21
  204. data/lib/treat/processors.rb +0 -38
  205. data/lib/treat/retrievers.rb +0 -27
  206. data/lib/treat/server.rb +0 -26
  207. data/lib/treat/universalisation/encodings.rb +0 -12
  208. data/lib/treat/universalisation/tags.rb +0 -453
  209. data/lib/treat/universalisation.rb +0 -9
  210. data/spec/languages.rb +0 -25
@@ -1,16 +0,0 @@
1
- class Treat::Languages::Dutch
2
-
3
- RequiredDependencies = []
4
- OptionalDependencies = []
5
-
6
- Extractors = {}
7
- Inflectors = {}
8
- Lexicalizers = {}
9
- Processors = {
10
- :chunkers => [:txt],
11
- :segmenters => [:punkt],
12
- :tokenizers => [:tactful]
13
- }
14
- Retrievers = {}
15
-
16
- end
@@ -1,63 +0,0 @@
1
- class Treat::Languages::English
2
-
3
- RequiredDependencies = ['rbtagger', 'ruby-stemmer', 'tactful_tokenizer', 'nickel', 'rwordnet']
4
- OptionalDependencies = ['uea-stemmer', 'engtagger', 'active_support', 'english']
5
-
6
- Extractors = {
7
- :time => [:chronic, :ruby, :nickel],
8
- :topics => [:reuters],
9
- :topic_words => [:lda],
10
- :keywords => [:tf_idf],
11
- :name_tag => [:stanford],
12
- :coreferences => [:stanford],
13
- :roles => [:naive]
14
- }
15
-
16
- Inflectors = {
17
- :conjugators => [:linguistics],
18
- :declensors => [:english, :linguistics, :active_support],
19
- :stemmers => [:porter, :porter_c, :uea],
20
- :ordinalizers => [:linguistics],
21
- :cardinalizers => [:linguistics]
22
- }
23
-
24
- Lexicalizers = {
25
- :categorizers => [:from_tag],
26
- :taggers => [:lingua, :brill, :stanford],
27
- :sensers => [:wordnet]
28
- }
29
-
30
- Processors = {
31
- :chunkers => [:txt],
32
- :parsers => [:stanford, :enju],
33
- :segmenters => [:tactful, :punkt, :stanford],
34
- :tokenizers => [:ptb, :stanford, :tactful, :punkt]
35
- }
36
-
37
- Retrievers = {
38
- :searchers => [:ferret],
39
- :indexers => [:ferret]
40
- }
41
-
42
- CommonWords = [
43
- 'the', 'of', 'and', 'a', 'to', 'in', 'is',
44
- 'you', 'that', 'it', 'he', 'was', 'for', 'on',
45
- 'are', 'as', 'with', 'his', 'they', 'I', 'at',
46
- 'be', 'this', 'have', 'from', 'or', 'one', 'had',
47
- 'by', 'word', 'but', 'not', 'what', 'all', 'were',
48
- 'we', 'when', 'your', 'can', 'said', 'there', 'use',
49
- 'an', 'each', 'which', 'she', 'do', 'how', 'their',
50
- 'if', 'will', 'up', 'other', 'about', 'out', 'many',
51
- 'then', 'them', 'these', 'so', 'some', 'her', 'would',
52
- 'make', 'like', 'him', 'into', 'time', 'has', 'look',
53
- 'two', 'more', 'write', 'go', 'see', 'number', 'no',
54
- 'way', 'could', 'people', 'my', 'than', 'first', 'been',
55
- 'call', 'who', 'its', 'now', 'find', 'long', 'down',
56
- 'day', 'did', 'get', 'come', 'made', 'may', 'part',
57
- 'say', 'also', 'new', 'much', 'should', 'still',
58
- 'such', 'before', 'after', 'other', 'then', 'over',
59
- 'under', 'therefore', 'nonetheless', 'thereafter',
60
- 'afterwards', 'here', 'huh', 'hah', "n't", "'t", 'here'
61
- ]
62
-
63
- end
@@ -1,20 +0,0 @@
1
- class Treat::Languages::French
2
-
3
- RequiredDependencies = []
4
- OptionalDependencies = []
5
-
6
- Extractors = {}
7
- Inflectors = {}
8
- Lexicalizers = {
9
- :taggers => [:stanford],
10
- :categorizers => [:from_tag]
11
- }
12
- Processors = {
13
- :chunkers => [:txt],
14
- :parsers => [:stanford],
15
- :segmenters => [:tactful],
16
- :tokenizers => [:tactful]
17
- }
18
- Retrievers = {}
19
-
20
- end
@@ -1,20 +0,0 @@
1
- class Treat::Languages::German
2
-
3
- RequiredDependencies = []
4
- OptionalDependencies = []
5
-
6
- Extractors = {}
7
- Inflectors = {}
8
- Lexicalizers = {
9
- :taggers => [:stanford],
10
- :categorizers => [:from_tag]
11
- }
12
- Processors = {
13
- :chunkers => [:txt],
14
- :parsers => [:stanford],
15
- :segmenters => [:tactful],
16
- :tokenizers => [:tactful]
17
- }
18
- Retrievers = {}
19
-
20
- end
@@ -1,16 +0,0 @@
1
- class Treat::Languages::Greek
2
-
3
- RequiredDependencies = []
4
- OptionalDependencies = []
5
-
6
- Extractors = {}
7
- Inflectors = {}
8
- Lexicalizers = {}
9
- Processors = {
10
- :chunkers => [:txt],
11
- :segmenters => [:punkt],
12
- :tokenizers => [:tactful]
13
- }
14
- Retrievers = {}
15
-
16
- end
@@ -1,17 +0,0 @@
1
- class Treat::Languages::Italian
2
-
3
- RequiredDependencies = []
4
- OptionalDependencies = []
5
-
6
- Extractors = {}
7
- Inflectors = {}
8
- Lexicalizers = {}
9
- Processors = {
10
- :chunkers => [:txt],
11
- :parsers => [:stanford],
12
- :segmenters => [:punkt],
13
- :tokenizers => [:tactful]
14
- }
15
- Retrievers = {}
16
-
17
- end
@@ -1,10 +0,0 @@
1
- class Treat::Languages::Language
2
-
3
- WordCategories = [
4
- :adjective, :adverb, :noun, :verb, :interjection,
5
- :clitic, :coverb, :conjunction, :determiner, :particle,
6
- :preposition, :pronoun, :number, :symbol, :punctuation,
7
- :complementizer
8
- ]
9
-
10
- end
@@ -1,504 +0,0 @@
1
- aar,aa,afar,afar
2
- abk,ab,abkhazian,abkhaze
3
- ace,,achinese,aceh
4
- ach,,acoli,acoli
5
- ada,,adangme,adangme
6
- ady,,adyghe,adyghé
7
- afa,,afro-asiatic languages,afro-asiatique
8
- afh,,afrihili,afrihili
9
- afr,af,afrikaans,afrikaans
10
- ain,,ainu,aïnou
11
- aka,ak,akan,akan
12
- akk,,akkadian,akkadien
13
- alb,sq,albanian,albanais
14
- ale,,aleut,aléoute
15
- alg,,algonquian languages,algonquine
16
- alt,,southern altai,altai du sud
17
- amh,am,amharic,amharique
18
- ang,,old english,anglo-saxon
19
- anp,,angika,angika
20
- apa,,apache languages,apache
21
- ara,ar,arabic,arabe
22
- arc,,official aramaic|imperial aramaic,araméen d'empire
23
- arg,an,aragonese,aragonais
24
- arm,hy,armenian,arménien
25
- arn,,mapudungun,mapudungun|mapuce
26
- arp,,arapaho,arapaho
27
- art,,artificial languages,artificielle
28
- arw,,arawak,arawak
29
- asm,as,assamese,assamais
30
- ast,,asturian|leonese,asturien|léonais
31
- ath,,athapascan languages,athapascane
32
- aus,,australian languages,australien
33
- ava,av,avaric,avar
34
- ave,ae,avestan,avestique
35
- awa,,awadhi,awadhi
36
- aym,ay,aymara,aymara
37
- aze,az,azerbaijani,azéri
38
- bad,,banda languages,banda
39
- bai,,bamileke languages,bamiléké
40
- bak,ba,bashkir,bachkir
41
- bal,,baluchi,baloutchi
42
- bam,bm,bambara,bambara
43
- ban,,balinese,balinais
44
- baq,eu,basque,basque
45
- bas,,basa,basa
46
- bat,,baltic languages,balte
47
- bej,,beja,bedja
48
- bel,be,belarusian,biélorusse
49
- bem,,bemba,bemba
50
- ben,bn,bengali,bengali
51
- ber,,berber languages,berbère
52
- bho,,bhojpuri,bhojpuri
53
- bih,bh,bihari languages,langues biharis
54
- bik,,bikol,bikol
55
- bin,,bini,bini
56
- bis,bi,bislama,bichlamar
57
- bla,,siksika,blackfoot
58
- bnt,,bantu languages,bantou
59
- tib,bo,tibetan,tibétain
60
- bos,bs,bosnian,bosniaque
61
- bra,,braj,braj
62
- bre,br,breton,breton
63
- btk,,batak languages,batak
64
- bua,,buriat,bouriate
65
- bug,,buginese,bugi
66
- bul,bg,bulgarian,bulgare
67
- bur,my,burmese,birman
68
- byn,,blin,blin
69
- cad,,caddo,caddo
70
- cai,,central american indian languages,amérindiennes de l'amérique centrale
71
- car,,galibi carib,karib|carib
72
- cat,ca,catalan,catalan
73
- cau,,caucasian languages,caucasien
74
- ceb,,cebuano,cebuano
75
- cel,,celtic languages,celtique
76
- cze cs,czech,tchèque
77
- cha,ch,chamorro,chamorro
78
- chb,,chibcha,chibcha
79
- che,ce,chechen,tchétchène
80
- chg,,chagatai,djaghataï
81
- chi,zh,chinese,chinois
82
- chk,,chuukese,chuuk
83
- chm,,mari,mari
84
- chn,,chinook,chinook
85
- cho,,choctaw,choctaw
86
- chp,,chipewyan suline,chipewyan
87
- chr,,cherokee,cherokee
88
- chu,cu,church slavic slavonic slavonic bulgarian church slavonic,slavon d'église slave naturgique bulgare
89
- chv,cv,chuvash,tchouvache
90
- chy,,cheyenne,cheyenne
91
- cmc,,chamic languages,chame
92
- cop,,coptic,copte
93
- cor,kw,cornish,cornique
94
- cos,co,corsican,corse
95
- cpe,,creole,créole
96
- cpf,,creole,créole
97
- cpp,,creole,créole
98
- cre,cr,cree,cree
99
- crh,,crimean tatar turkish,tatar de crimé
100
- crp,,creoles and pidgins,créoles et pidgins
101
- csb,,kashubian,kachoube
102
- cus,,cushitic languages,couchitique
103
- wel,cy,welsh,gallois
104
- cze,cs,czech,tchèque
105
- dak,,dakota,dakota
106
- dan,da,danish,danois
107
- dar,,dargwa,dargwa
108
- day,,land dayak languages,dayak
109
- del,,delaware,delaware
110
- den,,slave,esclave
111
- ger,de,german,allemand
112
- dgr,,dogrib,dogrib
113
- din,,dinka,dinka
114
- div,dv,divehi|maldivian,maldivien
115
- doi,,dogri,dogri
116
- dra,,dravidian languages,dravidien
117
- dsb,,lower sorbian,bas-sorabe
118
- dua,,duala,douala
119
- dum,,middle dutch,néerlandais moyen
120
- dut,nl,dutch,néerlandais
121
- dyu,,dyula,dioula
122
- dzo,dz,dzongkha,dzongkha
123
- efi,,efik,efik
124
- egy,,egyptian,égyptien
125
- eka,,ekajuk,ekajuk
126
- gre,el,greek,grec
127
- elx,,elamite,élamite
128
- eng,en,english,anglais
129
- enm,,middle english,anglais moyen
130
- epo,eo,esperanto,espéranto
131
- est,et,estonian,estonien
132
- baq,eu,basque,basque
133
- ewe,ee,ewe,éwé
134
- ewo,,ewondo,éwondo
135
- fan,,fang,fang
136
- fao,fo,faroese,féroïen
137
- per,fa,persian,persan
138
- fat,,fanti,fanti
139
- fij,fj,fijian,fidjien
140
- fil,,filipino,filipino
141
- fin,fi,finnish,finnois
142
- fiu,,finno-ugrian languages,finno-ougrien
143
- fon,,fon,fon
144
- fre,fr,french,français
145
- frm,,middle french,français moyen
146
- fro,,old french,français ancien
147
- frr,,northern frisian,frison septentrional
148
- frs,,eastern frisian,frison oriental
149
- fry,fy,western frisian,frison occidental
150
- ful,ff,fulah,peul
151
- fur,,friulian,frioulan
152
- gaa,,ga,ga
153
- gay,,gayo,gayo
154
- gba,,gbaya,gbaya
155
- gem,,germanic languages,germanique
156
- geo,ka,georgian,géorgien
157
- ger,de,german,allemand
158
- gez,,geez,guèze
159
- gil,,gilbertese,kiribati
160
- gla,gd,gaelic gaelic,gaélique écossais
161
- gle,ga,irish,irlandais
162
- glg,gl,galician,galicien
163
- glv,gv,manx,manx
164
- gmh,,middle high german,moyen haut allemand
165
- goh,,old high german,vieux haut allemand
166
- gon,,gondi,gond
167
- gor,,gorontalo,gorontalo
168
- got,,gothic,gothique
169
- grb,,grebo,grebo
170
- grc,,ancient greek,grec ancien
171
- gre,el,greek,grec
172
- grn,gn,guarani,guarani
173
- gsw,,swiss german|alsatian,suisse alémanique|alsacien
174
- guj,gu,gujarati,goudjrati
175
- gwi,,gwich'in,gwich'in
176
- hai,,haida,haida
177
- hat,ht,haitian creole,haïtien haïtien
178
- hau,ha,hausa,haoussa
179
- haw,,hawaiian,hawaïen
180
- heb,he,hebrew,hébreu
181
- her,hz,herero,herero
182
- hil,,hiligaynon,hiligaynon
183
- him,,himachali languages pahari languages,langues himachalis paharis occidentales
184
- hin,hi,hindi,hindi
185
- hit,,hittite,hittite
186
- hmn,,hmong,hmong
187
- hmo,ho,hiri motu,hiri motu
188
- hrv,hr,croatian,croate
189
- hsb,,upper sorbian,haut-sorabe
190
- hun,hu,hungarian,hongrois
191
- hup,,hupa,hupa
192
- arm,hy,armenian,arménien
193
- iba,,iban,iban
194
- ibo,ig,igbo,igbo
195
- ice,is,icelandic,islandais
196
- ido,io,ido,ido
197
- iii,ii,sichuan yi,yi de sichuan
198
- ijo,,ijo languages,ijo
199
- iku,iu,inuktitut,inuktitut
200
- ile,ie,interlingue,interlingue
201
- ilo,,iloko,ilocano
202
- ina,ia,interlingua,interlingua
203
- inc,,indic languages,indo-aryen
204
- ind,id,indonesian,indonésien
205
- ine,,indo-european languages,indo-européen
206
- inh,,ingush,ingouche
207
- ipk,ik,inupiaq,inupiaq
208
- ira,,iranian languages,iranien
209
- iro,,iroquoian languages,iroquoise
210
- ice,is,icelandic,islandais
211
- ita,it,italian,italien
212
- jav,jv,javanese,javanais
213
- jbo,,lojban,lojban
214
- jpn,ja,japanese,japonais
215
- jpr,,judeo-persian,judéo-persan
216
- jrb,,judeo-arabic,judéo-arabe
217
- kaa,,kara-kalpak,karakalpak
218
- kab,,kabyle,kabyle
219
- kac,,kachin,kachin
220
- kal,kl,kalaallisut,groenlandais
221
- kam,,kamba,kamba
222
- kan,kn,kannada,kannada
223
- kar,,karen languages,karen
224
- kas,ks,kashmiri,kashmiri
225
- geokat,ka,georgian,géorgien
226
- kau,kr,kanuri,kanouri
227
- kaw,,kawi,kawi
228
- kaz,kk,kazakh,kazakh
229
- kbd,,kabardian,kabardien
230
- kha,,khasi,khasi
231
- khi,,khoisan languages,khoïsan
232
- khm,km,central khmer,khmer central
233
- kho,,khotanese,khotanais
234
- kik,ki,kikuyu,kikuyu
235
- kin,rw,kinyarwanda,rwanda
236
- kir,ky,kirghiz,kirghiz
237
- kmb,,kimbundu,kimbundu
238
- kok,,konkani,konkani
239
- kom,kv,komi,kom
240
- kon,kg,kongo,kongo
241
- kor,ko,korean,coréen
242
- kos,,kosraean,kosrae
243
- kpe,,kpelle,kpellé
244
- krc,,karachay-balkar,karatchai balkar
245
- krl,,karelian,carélien
246
- kro,,kru languages,krou
247
- kru,,kurukh,kurukh
248
- kua,kj,kuanyama,kuanyama
249
- kum,,kumyk,koumyk
250
- kur,ku,kurdish,kurde
251
- kut,,kutenai,kutenai
252
- lad,,ladino,judéo-espagnol
253
- lah,,lahnda,lahnda
254
- lam,,lamba,lamba
255
- lao,lo,lao,lao
256
- lat,la,latin,latin
257
- lav,lv,latvian,letton
258
- lez,,lezghian,lezghien
259
- lim,li,limburgan|limburgish,limbourgeois
260
- lin,ln,lingala,lingala
261
- nat,lt,lithuanian,lituanien
262
- lol,,mongo,mongo
263
- loz,,lozi,lozi
264
- ltz,lb,luxembourgish,luxembourgeois
265
- lua,,luba-lulua,luba-lulua
266
- lub,lu,luba-katanga,luba-katanga
267
- lug,lg,ganda,ganda
268
- lui,,luiseno,luiseno
269
- lun,,lunda,lunda
270
- luo,,luo,luo
271
- lus,,lushai,lushai
272
- mac,mk,macedonian,macédonien
273
- mad,,madurese,madourais
274
- mag,,magahi,magahi
275
- mah,mh,marshallese,marshall
276
- mai,,maithili,maithili
277
- mak,,makasar,makassar
278
- mal,ml,malayalam,malayalam
279
- man,,mandingo,mandingue
280
- mao,mi,maori,maori
281
- map,,austronesian languages,austronésien
282
- mar,mr,marathi,marathe
283
- mas,,masai,massaï
284
- may,ms,malay,malais
285
- mdf,,moksha,moksa
286
- mdr,,mandar,mandar
287
- men,,mende,mendé
288
- mga,,middle irish,irlandais moyen
289
- mic,,mi'kmaq,mi'kmaq
290
- min,,minangkabau,minangkabau
291
- mis,,uncoded languages,langues non codées
292
- mac,mk,macedonian,macédonien
293
- mkh,,mon-khmer languages,môn-khmer
294
- mlg,mg,malagasy,malgache
295
- mlt,mt,maltese,maltais
296
- mnc,,manchu,mandchou
297
- mni,,manipuri,manipuri
298
- mno,,manobo languages,manobo
299
- moh,,mohawk,mohawk
300
- mon,mn,mongolian,mongol
301
- mos,,mossi,moré
302
- mao,mi,maori,maori
303
- may,ms,malay,malais
304
- mul,,multiple languages,multilingue
305
- mun,,munda languages,mounda
306
- mus,,creek,muskogee
307
- mwl,,mirandese,mirandais
308
- mwr,,marwari,marvari
309
- bur,my,burmese,birman
310
- myn,,mayan languages,maya
311
- myv,,erzya,erza
312
- nah,,nahuatl languages,nahuatl
313
- nai,,north american indian languages,nord-amérindien
314
- nap,,neaponatan,napolitain
315
- nau,na,nauru,nauruan
316
- nav,nv,navajo,navaho
317
- nbl,nr,south ndebele,ndébélé du sud
318
- nde,nd,north ndebele,ndébélé du nord
319
- ndo,ng,ndonga,ndonga
320
- nds,,low german saxon|,bas allemand saxon
321
- nep,ne,nepali,népalais
322
- new,,nepal bhasa,nepal bhasa
323
- nia,,nias,nias
324
- nic,,niger-kordofanian languages,nigéro-kordofanien
325
- niu,,niuean,niué
326
- dut,nl,dutch,néerlandais
327
- nno,nn,nynorsk norwegian,norvégien nynorsk
328
- nob,nb,bokmål norwegian,norvégien bokmål
329
- nog,,nogai,nogaï
330
- non,,old norse,vieux norrois
331
- nor,no,norwegian,norvégien
332
- nqo,,n'ko,n'ko
333
- nso,,pedi|northern sotho,pedi|sotho du nord
334
- nub,,nubian languages,nubien
335
- nwc,,classical newari newari nepal bhasa,newari classique
336
- nya,ny,chichewa|nyanja,chichewa|nyanja
337
- nym,,nyamwezi,nyamwezi
338
- nyn,,nyankole,nyankolé
339
- nyo,,nyoro,nyoro
340
- nzi,,nzima,nzema
341
- oci,oc,occitan,occitan
342
- oji,oj,ojibwa,ojibwa
343
- ori,or,oriya,oriya
344
- orm,om,oromo,galla
345
- osa,,osage,osage
346
- oss,os,ossetian,ossète
347
- ota,,ottoman turkish,turc ottoman
348
- oto,,otomian languages,otomi
349
- paa,,papuan languages,papoue
350
- pag,,pangasinan,pangasinan
351
- pal,,pahlavi,pahlavi
352
- pam,,pampanga,pampangan
353
- pan,pa,panjabi,pendjabi
354
- pap,,papiamento,papiamento
355
- pau,,palauan,palau
356
- peo,,old persian,vieux perse
357
- per,,farsi,persan
358
- phi,,philippine languages,philippine
359
- phn,,phoenician,phénicien
360
- pli,pi,pali,pali
361
- pny,,pinyin,pinyin
362
- pol,pl,polish,polonais
363
- pon,,pohnpeian,pohnpei
364
- por,pt,portuguese,portugais
365
- pra,,prakrit languages,prâkrit
366
- pro,,old provençal occitan,provençal ancien ancien
367
- pus,ps,pushto,pachto
368
- qaa-qtz,,reserved for local use,réservée à l'usage local
369
- que,qu,quechua,quechua
370
- raj,,rajasthani,rajasthani
371
- rap,,rapanui,rapanui
372
- rar,,rarotongan islands maori,rarotonga des îles cook
373
- roa,,romance languages,romane
374
- roh,rm,romansh,romanche
375
- rom,,romany,tsigane
376
- rum,ro,romanian|moldovan,roumain
377
- rum,ro,romanian|moldovan,roumain
378
- run,rn,rundi,rundi
379
- rup,,aromanian|macedo-romanian,aroumain-roumain
380
- rus,ru,russian,russe
381
- sad,,sandawe,sandawe
382
- sag,sg,sango,sango
383
- sah,,yakut,iakoute
384
- sai,,south american indian languages,sud-amérindien
385
- sal,,salishan languages,salishen
386
- sam,,samaritan aramaic,samaritain
387
- san,sa,sanskrit,sanskrit
388
- sas,,sasak,sasak
389
- sat,,santali,santal
390
- scn,,sicilian,sicilien
391
- sco,,scots,écossais
392
- sel,,selkup,selkoupe
393
- sem,,semitic languages,sémitique
394
- sga,,old irish,irlandais ancien
395
- sgn,,sign languages,langues des signes
396
- shn,,shan,chan
397
- sid,,sidamo,sidamo
398
- sin,si,sinhala,singhalais
399
- sio,,siouan languages,sioux
400
- sit,,sino-tibetan languages,sino-tibétaine
401
- sla,,slavic languages,slave
402
- slo,sk,slovak,slovaque
403
- slv,sl,slovenian,slovène
404
- sma,,southern sami,sami du sud
405
- sme,se,northern sami,sami du nord
406
- smi,,sami languages,same
407
- smj,,lule sami,sami de lule
408
- smn,,inari sami,sami d'inari
409
- smo,sm,samoan,samoan
410
- sms,,skolt sami,sami skolt
411
- sna,sn,shona,shona
412
- snd,sd,sindhi,sindhi
413
- snk,,soninke,soninké
414
- sog,,sogdian,sogdien
415
- som,so,somali,somali
416
- son,,songhai languages,songhai
417
- sot,st,southern sotho,sotho du sud
418
- spa,es,spanish,espagnol
419
- alb,sq,albanian,albanais
420
- srd,sc,sardinian,sarde
421
- srn,,sranan tongo,sranan tongo
422
- srp,sr,serbian,serbe
423
- srr,,serer,sérère
424
- ssa,,nilo-saharan languages,nilo-saharien
425
- ssw,ss,swati,swati
426
- suk,,sukuma,sukuma
427
- sun,su,sundanese,soundanais
428
- sus,,susu,soussou
429
- sux,,sumerian,sumérien
430
- swa,sw,swahili,swahili
431
- swe,sv,swedish,suédois
432
- syc,,classical syriac,syriaque classique
433
- syr,,syriac,syriaque
434
- tah,ty,tahitian,tahitien
435
- tai,,tai languages,tai
436
- tam,ta,tamil,tamoul
437
- tat,tt,tatar,tatar
438
- tel,te,telugu,télougou
439
- tem,,timne,temne
440
- ter,,tereno,tereno
441
- tet,,tetum,tetum
442
- tgk,tg,tajik,tadjik
443
- tgl,tl,tagalog,tagalog
444
- tha,th,thai,thaï
445
- tib,bo,tibetan,tibétain
446
- tig,,tigre,tigré
447
- tir,ti,tigrinya,tigrigna
448
- tiv,,tiv,tiv
449
- tkl,,tokelau,tokelau
450
- tlh,,klingon-hol,klingon
451
- tli,,tlingit,tlingit
452
- tmh,,tamashek,tamacheq
453
- tog,,tonga,tonga
454
- ton,to,tonga,tongan
455
- tpi,,tok pisin,tok pisin
456
- tsi,,tsimshian,tsimshian
457
- tsn,tn,tswana,tswana
458
- tso,ts,tsonga,tsonga
459
- tuk,tk,turkmen,turkmène
460
- tum,,tumbuka,tumbuka
461
- tup,,tupi languages,tupi
462
- tur,tr,turkish,turc
463
- tut,,altaic languages,altaïque
464
- tvl,,tuvalu,tuvalu
465
- twi,tw,twi,twi
466
- tyv,,tuvinian,touva
467
- udm,,udmurt,oudmourte
468
- uga,,ugaritic,ougaritique
469
- uig,ug,uighur,ouïgour
470
- ukr,uk,ukrainian,ukrainien
471
- umb,,umbundu,umbundu
472
- und,,undetermined,indéterminée
473
- urd,ur,urdu,ourdou
474
- uzb,uz,uzbek,ouszbek
475
- vai,,vai,vaï
476
- ven,ve,venda,venda
477
- vie,vi,vietnamese,vietnamien
478
- vol,vo,volapük,volapük
479
- vot,,votic,vote
480
- wak,,wakashan languages,wakashane
481
- wal,,wolaitta,wolaitta
482
- war,,waray,waray
483
- was,,washo,washo
484
- wel,cy,welsh,gallois
485
- wen,,sorbian languages,sorabe
486
- wln,wa,walloon,wallon
487
- wol,wo,wolof,wolof
488
- xal,,kalmyk,kalmouk
489
- xho,xh,xhosa,xhosa
490
- yao,,yao,yao
491
- yap,,yapese,yapois
492
- yid,yi,yiddish,yiddish
493
- yor,yo,yoruba,yoruba
494
- ypk,,yupik languages,yupik
495
- zap,,zapotec,zapotèque
496
- zbl,,blissymbols|bliss,symboles bliss
497
- zen,,zenaga,zenaga
498
- zha,za,zhuang,zhuang
499
- chi,zh,chinese,chinois
500
- znd,,zande languages,zandé
501
- zul,zu,zulu,zoulou
502
- zun,,zuni,zuni
503
- zxx,,no linguistic content applicable,pas de contenu linguistique applicable
504
- zza,,zaza|dimli|kirmanjki,zaza|dimli|kirmanjki
@@ -1,16 +0,0 @@
1
- class Treat::Languages::Polish
2
-
3
- RequiredDependencies = []
4
- OptionalDependencies = []
5
-
6
- Extractors = {}
7
- Inflectors = {}
8
- Lexicalizers = {}
9
- Processors = {
10
- :chunkers => [:txt],
11
- :segmenters => [:tactful],
12
- :tokenizers => [:tactful]
13
- }
14
- Retrievers = {}
15
-
16
- end
@@ -1,16 +0,0 @@
1
- class Treat::Languages::Portuguese
2
-
3
- RequiredDependencies = []
4
- OptionalDependencies = []
5
-
6
- Extractors = {}
7
- Inflectors = {}
8
- Lexicalizers = {}
9
- Processors = {
10
- :chunkers => [:txt],
11
- :segmenters => [:tactful],
12
- :tokenizers => [:tactful]
13
- }
14
- Retrievers = {}
15
-
16
- end