treat 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. data/INSTALL +0 -0
  2. data/LICENSE +28 -0
  3. data/README +0 -0
  4. data/TODO +67 -0
  5. data/bin/INFO +1 -0
  6. data/examples/benchmark.rb +81 -0
  7. data/examples/keywords.rb +60 -0
  8. data/examples/texts/bugged_out.txt +26 -0
  9. data/examples/texts/half_cocked_basel.txt +16 -0
  10. data/examples/texts/hedge_funds.txt +24 -0
  11. data/examples/texts/hose_and_dry.txt +19 -0
  12. data/examples/texts/hungarys_troubles.txt +46 -0
  13. data/examples/texts/indias_slowdown.txt +15 -0
  14. data/examples/texts/merkozy_rides_again.txt +24 -0
  15. data/examples/texts/prada_is_not_walmart.txt +9 -0
  16. data/examples/texts/republican_nomination.txt +26 -0
  17. data/examples/texts/to_infinity_and_beyond.txt +15 -0
  18. data/lib/treat.rb +91 -0
  19. data/lib/treat/buildable.rb +115 -0
  20. data/lib/treat/categories.rb +29 -0
  21. data/lib/treat/category.rb +28 -0
  22. data/lib/treat/delegatable.rb +90 -0
  23. data/lib/treat/detectors.rb +28 -0
  24. data/lib/treat/detectors/encoding/native.rb +12 -0
  25. data/lib/treat/detectors/encoding/r_chardet19.rb +24 -0
  26. data/lib/treat/detectors/format/file.rb +36 -0
  27. data/lib/treat/detectors/language/language_detector.rb +19 -0
  28. data/lib/treat/detectors/language/what_language.rb +29 -0
  29. data/lib/treat/entities.rb +52 -0
  30. data/lib/treat/entities/collection.rb +19 -0
  31. data/lib/treat/entities/constituents.rb +15 -0
  32. data/lib/treat/entities/document.rb +11 -0
  33. data/lib/treat/entities/entity.rb +242 -0
  34. data/lib/treat/entities/sentence.rb +8 -0
  35. data/lib/treat/entities/text.rb +7 -0
  36. data/lib/treat/entities/tokens.rb +37 -0
  37. data/lib/treat/entities/zones.rb +17 -0
  38. data/lib/treat/exception.rb +5 -0
  39. data/lib/treat/extractors.rb +41 -0
  40. data/lib/treat/extractors/key_sentences/topics_frequency.rb +49 -0
  41. data/lib/treat/extractors/named_entity/abner.rb +20 -0
  42. data/lib/treat/extractors/named_entity/stanford.rb +174 -0
  43. data/lib/treat/extractors/statistics/frequency.rb +22 -0
  44. data/lib/treat/extractors/statistics/frequency_of.rb +17 -0
  45. data/lib/treat/extractors/statistics/position_in.rb +13 -0
  46. data/lib/treat/extractors/statistics/transition_matrix.rb +105 -0
  47. data/lib/treat/extractors/statistics/transition_probability.rb +53 -0
  48. data/lib/treat/extractors/time/chronic.rb +12 -0
  49. data/lib/treat/extractors/time/native.rb +12 -0
  50. data/lib/treat/extractors/time/nickel.rb +45 -0
  51. data/lib/treat/extractors/topic_words/lda.rb +71 -0
  52. data/lib/treat/extractors/topic_words/lda/data.dat +46 -0
  53. data/lib/treat/extractors/topic_words/lda/wiki.yml +121 -0
  54. data/lib/treat/extractors/topics/reuters.rb +91 -0
  55. data/lib/treat/extractors/topics/reuters/industry.xml +2717 -0
  56. data/lib/treat/extractors/topics/reuters/region.xml +13585 -0
  57. data/lib/treat/extractors/topics/reuters/topics.xml +17977 -0
  58. data/lib/treat/feature.rb +53 -0
  59. data/lib/treat/formatters.rb +44 -0
  60. data/lib/treat/formatters/cleaners/html.rb +17 -0
  61. data/lib/treat/formatters/readers/autoselect.rb +35 -0
  62. data/lib/treat/formatters/readers/gocr.rb +24 -0
  63. data/lib/treat/formatters/readers/html.rb +13 -0
  64. data/lib/treat/formatters/readers/ocropus.rb +31 -0
  65. data/lib/treat/formatters/readers/pdf.rb +17 -0
  66. data/lib/treat/formatters/readers/txt.rb +15 -0
  67. data/lib/treat/formatters/serializers/xml.rb +48 -0
  68. data/lib/treat/formatters/serializers/yaml.rb +15 -0
  69. data/lib/treat/formatters/serializers/yaml/helper.rb +96 -0
  70. data/lib/treat/formatters/unserializers/autoselect.rb +19 -0
  71. data/lib/treat/formatters/unserializers/xml.rb +79 -0
  72. data/lib/treat/formatters/unserializers/yaml.rb +15 -0
  73. data/lib/treat/formatters/visualizers/dot.rb +73 -0
  74. data/lib/treat/formatters/visualizers/html.rb +12 -0
  75. data/lib/treat/formatters/visualizers/inspect.rb +16 -0
  76. data/lib/treat/formatters/visualizers/short_value.rb +14 -0
  77. data/lib/treat/formatters/visualizers/standoff.rb +41 -0
  78. data/lib/treat/formatters/visualizers/tree.rb +28 -0
  79. data/lib/treat/formatters/visualizers/txt.rb +31 -0
  80. data/lib/treat/group.rb +96 -0
  81. data/lib/treat/inflectors.rb +50 -0
  82. data/lib/treat/inflectors/cardinal_words/linguistics.rb +45 -0
  83. data/lib/treat/inflectors/conjugators/linguistics.rb +30 -0
  84. data/lib/treat/inflectors/declensors/en.rb +18 -0
  85. data/lib/treat/inflectors/declensors/linguistics.rb +30 -0
  86. data/lib/treat/inflectors/lemmatizers/e_lemma.rb +12 -0
  87. data/lib/treat/inflectors/lemmatizers/e_lemma/Makefile +213 -0
  88. data/lib/treat/inflectors/lemmatizers/e_lemma/elemma.c +68 -0
  89. data/lib/treat/inflectors/lemmatizers/e_lemma/extconf.rb +6 -0
  90. data/lib/treat/inflectors/ordinal_words/linguistics.rb +21 -0
  91. data/lib/treat/inflectors/stemmers/porter.rb +158 -0
  92. data/lib/treat/inflectors/stemmers/porter_c.rb +23 -0
  93. data/lib/treat/inflectors/stemmers/uea.rb +30 -0
  94. data/lib/treat/lexicalizers.rb +49 -0
  95. data/lib/treat/lexicalizers/category/from_tag.rb +30 -0
  96. data/lib/treat/lexicalizers/linkages/naive.rb +63 -0
  97. data/lib/treat/lexicalizers/synsets/rita_wn.rb +23 -0
  98. data/lib/treat/lexicalizers/synsets/wordnet.rb +72 -0
  99. data/lib/treat/lexicalizers/tag/brill.rb +101 -0
  100. data/lib/treat/lexicalizers/tag/lingua.rb +114 -0
  101. data/lib/treat/lexicalizers/tag/stanford.rb +86 -0
  102. data/lib/treat/processors.rb +45 -0
  103. data/lib/treat/processors/chunkers/txt.rb +27 -0
  104. data/lib/treat/processors/parsers/enju.rb +214 -0
  105. data/lib/treat/processors/parsers/stanford.rb +60 -0
  106. data/lib/treat/processors/segmenters/punkt.rb +48 -0
  107. data/lib/treat/processors/segmenters/stanford.rb +45 -0
  108. data/lib/treat/processors/segmenters/tactful.rb +34 -0
  109. data/lib/treat/processors/tokenizers/macintyre.rb +76 -0
  110. data/lib/treat/processors/tokenizers/multilingual.rb +31 -0
  111. data/lib/treat/processors/tokenizers/perl.rb +96 -0
  112. data/lib/treat/processors/tokenizers/punkt.rb +42 -0
  113. data/lib/treat/processors/tokenizers/stanford.rb +33 -0
  114. data/lib/treat/processors/tokenizers/tactful.rb +59 -0
  115. data/lib/treat/proxies.rb +66 -0
  116. data/lib/treat/registrable.rb +26 -0
  117. data/lib/treat/resources.rb +10 -0
  118. data/lib/treat/resources/categories.rb +18 -0
  119. data/lib/treat/resources/delegates.rb +96 -0
  120. data/lib/treat/resources/dependencies.rb +0 -0
  121. data/lib/treat/resources/edges.rb +8 -0
  122. data/lib/treat/resources/formats.rb +23 -0
  123. data/lib/treat/resources/languages.rb +86 -0
  124. data/lib/treat/resources/languages.txt +504 -0
  125. data/lib/treat/resources/tags.rb +393 -0
  126. data/lib/treat/sugar.rb +43 -0
  127. data/lib/treat/tree.rb +174 -0
  128. data/lib/treat/utilities.rb +127 -0
  129. data/lib/treat/visitable.rb +27 -0
  130. data/test/profile.rb +2 -0
  131. data/test/tc_detectors.rb +27 -0
  132. data/test/tc_entity.rb +105 -0
  133. data/test/tc_extractors.rb +48 -0
  134. data/test/tc_formatters.rb +46 -0
  135. data/test/tc_inflectors.rb +39 -0
  136. data/test/tc_lexicalizers.rb +39 -0
  137. data/test/tc_processors.rb +36 -0
  138. data/test/tc_resources.rb +27 -0
  139. data/test/tc_treat.rb +64 -0
  140. data/test/tc_tree.rb +60 -0
  141. data/test/tests.rb +19 -0
  142. data/test/texts.rb +20 -0
  143. data/test/texts/english/long.html +24 -0
  144. data/test/texts/english/long.txt +22 -0
  145. data/test/texts/english/medium.txt +5 -0
  146. data/test/texts/english/short.txt +3 -0
  147. metadata +412 -0
@@ -0,0 +1,26 @@
1
+ module Treat
2
+ module Registrable
3
+ # Registers a token in the @token_registry
4
+ # hash in the root node.
5
+ def register_token(token)
6
+ if is_root?
7
+ @token_registry ||= {value: {}, id: {}}
8
+ @token_registry[:id][token.id] = token
9
+ @token_registry[:value][token.value] ||= []
10
+ @token_registry[:value][token.value] << token
11
+ else
12
+ @parent.register_token(token)
13
+ end
14
+ end
15
+ # Find the token registry, which is
16
+ # always in the root node.
17
+ def token_registry
18
+ if has_parent?
19
+ @parent.token_registry
20
+ else
21
+ @token_registry ||= {value: {}, id: {}}
22
+ @token_registry
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,10 @@
1
+ module Treat
2
+ module Resources
3
+ require 'treat/resources/delegates'
4
+ require 'treat/resources/dependencies'
5
+ require 'treat/resources/edges'
6
+ require 'treat/resources/languages'
7
+ require 'treat/resources/tags'
8
+ require 'treat/resources/categories'
9
+ end
10
+ end
@@ -0,0 +1,18 @@
1
+ module Treat
2
+ module Resources
3
+ class Categories
4
+ List = [
5
+ :adjective, :adverb, :noun, :verb, :interjection,
6
+ :clitic, :coverb, :conjunction, :determiner, :particle,
7
+ :preposition, :pronoun, :number, :symbol, :punctuation,
8
+ :complementizer
9
+ ]
10
+ wttc = {}
11
+ Treat::Resources::Tags::AlignedWordTags.each_slice(2) do |desc, tags|
12
+ desc = desc.gsub(',', ' ,').split(' ')[0].downcase
13
+ tags.each { |tag| wttc[tag] = desc.intern }
14
+ end
15
+ WordTagToCategory = wttc
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,96 @@
1
+ module Treat
2
+ module Resources
3
+ module Delegates
4
+ class English
5
+ Extractors = {
6
+ time: [:chronic],
7
+ topics: [:reuters],
8
+ topic_words: [:lda],
9
+ key_sentences: [:topics_frequency]
10
+ }
11
+ Processors = {
12
+ chunkers: [:txt],
13
+ parsers: [:enju, :stanford],
14
+ segmenters: [:tactful, :punkt, :stanford],
15
+ tokenizers: [:multilingual, :macintyre, :perl, :punkt, :tactful, :stanford]
16
+ }
17
+ Lexicalizers = {
18
+ category: [:from_tag],
19
+ linkages: [:naive],
20
+ synsets: [:wordnet, :rita_wn],
21
+ tag: [:brill, :lingua, :stanford]
22
+ }
23
+ Inflectors = {
24
+ conjugators: [:linguistics],
25
+ declensors: [:linguistics, :english],
26
+ lemmatizers: [:e_lemma],
27
+ stemmers: [:porter_c, :porter, :uea],
28
+ ordinal_words: [:linguistics],
29
+ cardinal_words: [:linguistics]
30
+ }
31
+ end
32
+ class German
33
+ Extractors = {}
34
+ Inflectors = {}
35
+ Lexicalizers = {
36
+ tag: [:stanford]
37
+ }
38
+ Processors = {
39
+ chunkers: [:txt],
40
+ parsers: [:stanford],
41
+ segmenters: [:tactful, :punkt, :stanford],
42
+ tokenizers: [:multilingual, :macintyre, :perl, :punkt, :tactful, :stanford]
43
+ }
44
+ end
45
+ class French
46
+ Extractors = {}
47
+ Inflectors = {}
48
+ Lexicalizers = {
49
+ tag: [:stanford]
50
+ }
51
+ Processors = {
52
+ chunkers: [:txt],
53
+ parsers: [:stanford],
54
+ segmenters: [:tactful, :punkt, :stanford],
55
+ tokenizers: [:multilingual, :macintyre, :perl, :punkt, :tactful, :stanford]
56
+ }
57
+ end
58
+ class Italian
59
+ Extractors = {}
60
+ Inflectors = {}
61
+ Lexicalizers = {}
62
+ Processors = {
63
+ chunkers: [:txt],
64
+ segmenters: [:tactful, :punkt, :stanford],
65
+ tokenizers: [:multilingual, :macintyre, :perl, :punkt, :tactful, :stanford]
66
+ }
67
+ end
68
+ class Arabic
69
+ Extractors = {}
70
+ Inflectors = {}
71
+ Lexicalizers = {
72
+ tag: [:stanford]
73
+ }
74
+ Processors = {
75
+ parsers: [:stanford]
76
+ }
77
+ end
78
+ class Chinese
79
+ Extractors = {}
80
+ Inflectors = {}
81
+ Lexicalizers = {
82
+ tag: [:stanford]
83
+ }
84
+ Processors = {}
85
+ end
86
+ class Xinhua
87
+ Extractors = {}
88
+ Inflectors = {}
89
+ Lexicalizers = {}
90
+ Processors = {
91
+ parsers: [:stanford]
92
+ }
93
+ end
94
+ end
95
+ end
96
+ end
File without changes
@@ -0,0 +1,8 @@
1
+ # http://nlp.stanford.edu/software/dependencies_manual.pdf
2
+
3
+ =begin
4
+ ENJU
5
+
6
+ pred: noun_arg0, noun_arg1, noun_arg2, noun_arg12, it_arg1, there_arg0, quote_arg2, quote_arg12, quote_arg23, quote_arg123, poss_arg2, poss_arg12, aux_arg12, aux_mod_arg12, verb_arg1, verb_arg12, verb_arg123, verb_arg1234, verb_mod_arg1, verb_mod_arg12, verb_mod_arg123, verb_mod_arg1234, adj_arg1, adj_arg12, adj_mod_arg1, adj_mod_arg12, conj_arg1, conj_arg12, conj_arg123, coord_arg12, det_arg1, prep_arg12, prep_arg123, prep_mod_arg12, prep_mod_arg123, lgs_arg2, dtv_arg2, punct_arg1, app_arg12, lparen_arg123, rparen_arg0, comp_arg1, comp_arg12, comp_mod_arg1, relative_arg1, relative_arg12
7
+
8
+ =end
@@ -0,0 +1,23 @@
1
+ module Treat
2
+ module Resources
3
+ module Format
4
+
5
+ class XML
6
+ require 'nokogiri'
7
+ def self.validate(document_path, schema_path, root_element)
8
+ schema = Nokogiri::XML::Schema(File.read(schema_path))
9
+ document = Nokogiri::XML(File.read(document_path))
10
+ schema.validate(document.xpath("//#{root_element}").to_s)
11
+ end
12
+ validate('input.xml', 'schema.xdf', 'container').each do |error|
13
+ puts error.message
14
+ end
15
+ end
16
+
17
+ class HTML < XML
18
+
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,86 @@
1
+ module Treat
2
+ module Resources
3
+ # Dictionnary of ISO-639-1, ISO-639-2 language codes,
4
+ # as well as their full text description in both
5
+ # English and French.
6
+ module Languages
7
+ ISO639_1 = 1
8
+ ISO639_2 = 2
9
+ # Describe a language code (ISO-639-1 or ISO-639-2)
10
+ # or its full text description in full French or English.
11
+ def self.describe(lang, desc_lang = :en)
12
+ raise "Must provide a non-nil language identifier to describe." if lang.nil?
13
+ lang = find(lang).to_s
14
+ if [:en, :eng, :english, :anglais].include?(desc_lang)
15
+ l = @@english_full.key(lang)
16
+ elsif [:fr, :fra, :french, :french].include?(desc_lang)
17
+ l = @@french_full.key(lang)
18
+ else
19
+ raise Treat::Exception,
20
+ "Unknown language to describe: #{desc_lang}."
21
+ end
22
+ not_found(lang) if l.nil?
23
+ l.intern
24
+ end
25
+ # Raise an error message when a language code
26
+ # or description is not found and suggest
27
+ # possible misspellings.
28
+ def self.not_found(lang)
29
+ msg = "Language '#{lang}' does not exist."
30
+ all = @@iso639_2.keys + @@iso639_1.keys +
31
+ @@english_full.keys + @@french_full.keys
32
+ msg += did_you_mean?(all, lang)
33
+ raise Treat::Exception, msg
34
+ end
35
+ # Find a language by ISO-639-1 or ISO-639-2 code
36
+ # or full name (in English or French) and return
37
+ # the ISO-639-1 or ISO-639-2 language code as a
38
+ # lowercase identifier.
39
+ def self.find(lang, rc = ISO639_2)
40
+ raise "Must provide a non-nil language identifier to describe." if lang.nil?
41
+ get_languages
42
+ lang = lang.to_s.downcase
43
+ if @@iso639_1.has_key?(lang)
44
+ return :"#{lang}" if rc == ISO639_1
45
+ return :"#{@@iso639_1[lang]}" if rc == ISO639_2
46
+ elsif @@iso639_2.has_key?(lang)
47
+ return :"#{lang}" if rc == ISO639_2
48
+ return :"#{@@iso639_2[lang]}" if rc == ISO639_1
49
+ elsif @@english_full.has_key?(lang)
50
+ return :"#{@@english_full[lang]}" if rc == ISO639_2
51
+ return :"#{@@iso639_2[@@english_full[lang]]}" if rc == ISO639_1
52
+ elsif @@french_full.has_key?(lang)
53
+ return :"#{@@french_full[lang]}" if rc == ISO639_2
54
+ return :"#{@@iso639_1[@@french_full[lang]]}" if rc == ISO639_2
55
+ else
56
+ not_found(lang)
57
+ end
58
+ end
59
+ @@loaded = false
60
+ # Get the languages from the dictionary.
61
+ def self.get_languages
62
+ return if @@loaded
63
+ @@iso639_1 = {}; @@iso639_2 = {};
64
+ @@english_full = {}; @@french_full = {}
65
+ languages = IO.readlines(Treat.lib + '/treat/resources/languages.txt')
66
+ languages.each do |language|
67
+ iso639_2, iso639_1, english_desc, french_desc =
68
+ language.split(',')
69
+ @@iso639_1[iso639_1] = iso639_2
70
+ @@iso639_2[iso639_2] = iso639_1
71
+ unless english_desc.nil?
72
+ english_desc.strip.downcase.split('|').each do |l|
73
+ @@english_full[l.downcase.strip] = iso639_2
74
+ end
75
+ end
76
+ unless french_desc.nil?
77
+ french_desc.strip.downcase.split('|').each do |l|
78
+ @@french_full[l.downcase.strip] = iso639_2
79
+ end
80
+ end
81
+ end
82
+ @@loaded = true
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,504 @@
1
+ aar,aa,afar,afar
2
+ abk,ab,abkhazian,abkhaze
3
+ ace,,achinese,aceh
4
+ ach,,acoli,acoli
5
+ ada,,adangme,adangme
6
+ ady,,adyghe,adyghé
7
+ afa,,afro-asiatic languages,afro-asiatique
8
+ afh,,afrihili,afrihili
9
+ afr,af,afrikaans,afrikaans
10
+ ain,,ainu,aïnou
11
+ aka,ak,akan,akan
12
+ akk,,akkadian,akkadien
13
+ alb,sq,albanian,albanais
14
+ ale,,aleut,aléoute
15
+ alg,,algonquian languages,algonquine
16
+ alt,,southern altai,altai du sud
17
+ amh,am,amharic,amharique
18
+ ang,,old english,anglo-saxon
19
+ anp,,angika,angika
20
+ apa,,apache languages,apache
21
+ ara,ar,arabic,arabe
22
+ arc,,official aramaic|imperial aramaic,araméen d'empire
23
+ arg,an,aragonese,aragonais
24
+ arm,hy,armenian,arménien
25
+ arn,,mapudungun,mapudungun|mapuce
26
+ arp,,arapaho,arapaho
27
+ art,,artificial languages,artificielle
28
+ arw,,arawak,arawak
29
+ asm,as,assamese,assamais
30
+ ast,,asturian|leonese,asturien|léonais
31
+ ath,,athapascan languages,athapascane
32
+ aus,,australian languages,australien
33
+ ava,av,avaric,avar
34
+ ave,ae,avestan,avestique
35
+ awa,,awadhi,awadhi
36
+ aym,ay,aymara,aymara
37
+ aze,az,azerbaijani,azéri
38
+ bad,,banda languages,banda
39
+ bai,,bamileke languages,bamiléké
40
+ bak,ba,bashkir,bachkir
41
+ bal,,baluchi,baloutchi
42
+ bam,bm,bambara,bambara
43
+ ban,,balinese,balinais
44
+ baq,eu,basque,basque
45
+ bas,,basa,basa
46
+ bat,,baltic languages,balte
47
+ bej,,beja,bedja
48
+ bel,be,belarusian,biélorusse
49
+ bem,,bemba,bemba
50
+ ben,bn,bengali,bengali
51
+ ber,,berber languages,berbère
52
+ bho,,bhojpuri,bhojpuri
53
+ bih,bh,bihari languages,langues biharis
54
+ bik,,bikol,bikol
55
+ bin,,bini,bini
56
+ bis,bi,bislama,bichlamar
57
+ bla,,siksika,blackfoot
58
+ bnt,,bantu languages,bantou
59
+ tib,bo,tibetan,tibétain
60
+ bos,bs,bosnian,bosniaque
61
+ bra,,braj,braj
62
+ bre,br,breton,breton
63
+ btk,,batak languages,batak
64
+ bua,,buriat,bouriate
65
+ bug,,buginese,bugi
66
+ bul,bg,bulgarian,bulgare
67
+ bur,my,burmese,birman
68
+ byn,,blin,blin
69
+ cad,,caddo,caddo
70
+ cai,,central american indian languages,amérindiennes de l'amérique centrale
71
+ car,,galibi carib,karib|carib
72
+ cat,ca,catalan,catalan
73
+ cau,,caucasian languages,caucasien
74
+ ceb,,cebuano,cebuano
75
+ cel,,celtic languages,celtique
76
+ cze cs,czech,tchèque
77
+ cha,ch,chamorro,chamorro
78
+ chb,,chibcha,chibcha
79
+ che,ce,chechen,tchétchène
80
+ chg,,chagatai,djaghataï
81
+ chi,zh,chinese,chinois
82
+ chk,,chuukese,chuuk
83
+ chm,,mari,mari
84
+ chn,,chinook,chinook
85
+ cho,,choctaw,choctaw
86
+ chp,,chipewyan suline,chipewyan
87
+ chr,,cherokee,cherokee
88
+ chu,cu,church slavic slavonic slavonic bulgarian church slavonic,slavon d'église slave naturgique bulgare
89
+ chv,cv,chuvash,tchouvache
90
+ chy,,cheyenne,cheyenne
91
+ cmc,,chamic languages,chame
92
+ cop,,coptic,copte
93
+ cor,kw,cornish,cornique
94
+ cos,co,corsican,corse
95
+ cpe,,creole,créole
96
+ cpf,,creole,créole
97
+ cpp,,creole,créole
98
+ cre,cr,cree,cree
99
+ crh,,crimean tatar turkish,tatar de crimé
100
+ crp,,creoles and pidgins,créoles et pidgins
101
+ csb,,kashubian,kachoube
102
+ cus,,cushitic languages,couchitique
103
+ wel,cy,welsh,gallois
104
+ cze,cs,czech,tchèque
105
+ dak,,dakota,dakota
106
+ dan,da,danish,danois
107
+ dar,,dargwa,dargwa
108
+ day,,land dayak languages,dayak
109
+ del,,delaware,delaware
110
+ den,,slave,esclave
111
+ ger,de,german,allemand
112
+ dgr,,dogrib,dogrib
113
+ din,,dinka,dinka
114
+ div,dv,divehi|maldivian,maldivien
115
+ doi,,dogri,dogri
116
+ dra,,dravidian languages,dravidien
117
+ dsb,,lower sorbian,bas-sorabe
118
+ dua,,duala,douala
119
+ dum,,middle dutch,néerlandais moyen
120
+ dut,nl,dutch,néerlandais
121
+ dyu,,dyula,dioula
122
+ dzo,dz,dzongkha,dzongkha
123
+ efi,,efik,efik
124
+ egy,,egyptian,égyptien
125
+ eka,,ekajuk,ekajuk
126
+ gre,el,greek,grec
127
+ elx,,elamite,élamite
128
+ eng,en,english,anglais
129
+ enm,,middle english,anglais moyen
130
+ epo,eo,esperanto,espéranto
131
+ est,et,estonian,estonien
132
+ baq,eu,basque,basque
133
+ ewe,ee,ewe,éwé
134
+ ewo,,ewondo,éwondo
135
+ fan,,fang,fang
136
+ fao,fo,faroese,féroïen
137
+ per,fa,persian,persan
138
+ fat,,fanti,fanti
139
+ fij,fj,fijian,fidjien
140
+ fil,,filipino,filipino
141
+ fin,fi,finnish,finnois
142
+ fiu,,finno-ugrian languages,finno-ougrien
143
+ fon,,fon,fon
144
+ fre,fr,french,français
145
+ frm,,middle french,français moyen
146
+ fro,,old french,français ancien
147
+ frr,,northern frisian,frison septentrional
148
+ frs,,eastern frisian,frison oriental
149
+ fry,fy,western frisian,frison occidental
150
+ ful,ff,fulah,peul
151
+ fur,,friulian,frioulan
152
+ gaa,,ga,ga
153
+ gay,,gayo,gayo
154
+ gba,,gbaya,gbaya
155
+ gem,,germanic languages,germanique
156
+ geo,ka,georgian,géorgien
157
+ ger,de,german,allemand
158
+ gez,,geez,guèze
159
+ gil,,gilbertese,kiribati
160
+ gla,gd,gaelic gaelic,gaélique écossais
161
+ gle,ga,irish,irlandais
162
+ glg,gl,galician,galicien
163
+ glv,gv,manx,manx
164
+ gmh,,middle high german,moyen haut allemand
165
+ goh,,old high german,vieux haut allemand
166
+ gon,,gondi,gond
167
+ gor,,gorontalo,gorontalo
168
+ got,,gothic,gothique
169
+ grb,,grebo,grebo
170
+ grc,,ancient greek,grec ancien
171
+ gre,el,greek,grec
172
+ grn,gn,guarani,guarani
173
+ gsw,,swiss german|alsatian,suisse alémanique|alsacien
174
+ guj,gu,gujarati,goudjrati
175
+ gwi,,gwich'in,gwich'in
176
+ hai,,haida,haida
177
+ hat,ht,haitian creole,haïtien haïtien
178
+ hau,ha,hausa,haoussa
179
+ haw,,hawaiian,hawaïen
180
+ heb,he,hebrew,hébreu
181
+ her,hz,herero,herero
182
+ hil,,hiligaynon,hiligaynon
183
+ him,,himachali languages pahari languages,langues himachalis paharis occidentales
184
+ hin,hi,hindi,hindi
185
+ hit,,hittite,hittite
186
+ hmn,,hmong,hmong
187
+ hmo,ho,hiri motu,hiri motu
188
+ hrv,hr,croatian,croate
189
+ hsb,,upper sorbian,haut-sorabe
190
+ hun,hu,hungarian,hongrois
191
+ hup,,hupa,hupa
192
+ arm,hy,armenian,arménien
193
+ iba,,iban,iban
194
+ ibo,ig,igbo,igbo
195
+ ice,is,icelandic,islandais
196
+ ido,io,ido,ido
197
+ iii,ii,sichuan yi,yi de sichuan
198
+ ijo,,ijo languages,ijo
199
+ iku,iu,inuktitut,inuktitut
200
+ ile,ie,interlingue,interlingue
201
+ ilo,,iloko,ilocano
202
+ ina,ia,interlingua,interlingua
203
+ inc,,indic languages,indo-aryen
204
+ ind,id,indonesian,indonésien
205
+ ine,,indo-european languages,indo-européen
206
+ inh,,ingush,ingouche
207
+ ipk,ik,inupiaq,inupiaq
208
+ ira,,iranian languages,iranien
209
+ iro,,iroquoian languages,iroquoise
210
+ ice,is,icelandic,islandais
211
+ ita,it,italian,italien
212
+ jav,jv,javanese,javanais
213
+ jbo,,lojban,lojban
214
+ jpn,ja,japanese,japonais
215
+ jpr,,judeo-persian,judéo-persan
216
+ jrb,,judeo-arabic,judéo-arabe
217
+ kaa,,kara-kalpak,karakalpak
218
+ kab,,kabyle,kabyle
219
+ kac,,kachin,kachin
220
+ kal,kl,kalaallisut,groenlandais
221
+ kam,,kamba,kamba
222
+ kan,kn,kannada,kannada
223
+ kar,,karen languages,karen
224
+ kas,ks,kashmiri,kashmiri
225
+ geokat,ka,georgian,géorgien
226
+ kau,kr,kanuri,kanouri
227
+ kaw,,kawi,kawi
228
+ kaz,kk,kazakh,kazakh
229
+ kbd,,kabardian,kabardien
230
+ kha,,khasi,khasi
231
+ khi,,khoisan languages,khoïsan
232
+ khm,km,central khmer,khmer central
233
+ kho,,khotanese,khotanais
234
+ kik,ki,kikuyu,kikuyu
235
+ kin,rw,kinyarwanda,rwanda
236
+ kir,ky,kirghiz,kirghiz
237
+ kmb,,kimbundu,kimbundu
238
+ kok,,konkani,konkani
239
+ kom,kv,komi,kom
240
+ kon,kg,kongo,kongo
241
+ kor,ko,korean,coréen
242
+ kos,,kosraean,kosrae
243
+ kpe,,kpelle,kpellé
244
+ krc,,karachay-balkar,karatchai balkar
245
+ krl,,karelian,carélien
246
+ kro,,kru languages,krou
247
+ kru,,kurukh,kurukh
248
+ kua,kj,kuanyama,kuanyama
249
+ kum,,kumyk,koumyk
250
+ kur,ku,kurdish,kurde
251
+ kut,,kutenai,kutenai
252
+ lad,,ladino,judéo-espagnol
253
+ lah,,lahnda,lahnda
254
+ lam,,lamba,lamba
255
+ lao,lo,lao,lao
256
+ lat,la,latin,latin
257
+ lav,lv,latvian,letton
258
+ lez,,lezghian,lezghien
259
+ lim,li,limburgan|limburgish,limbourgeois
260
+ lin,ln,lingala,lingala
261
+ nat,lt,lithuanian,lituanien
262
+ lol,,mongo,mongo
263
+ loz,,lozi,lozi
264
+ ltz,lb,luxembourgish,luxembourgeois
265
+ lua,,luba-lulua,luba-lulua
266
+ lub,lu,luba-katanga,luba-katanga
267
+ lug,lg,ganda,ganda
268
+ lui,,luiseno,luiseno
269
+ lun,,lunda,lunda
270
+ luo,,luo,luo
271
+ lus,,lushai,lushai
272
+ mac,mk,macedonian,macédonien
273
+ mad,,madurese,madourais
274
+ mag,,magahi,magahi
275
+ mah,mh,marshallese,marshall
276
+ mai,,maithili,maithili
277
+ mak,,makasar,makassar
278
+ mal,ml,malayalam,malayalam
279
+ man,,mandingo,mandingue
280
+ mao,mi,maori,maori
281
+ map,,austronesian languages,austronésien
282
+ mar,mr,marathi,marathe
283
+ mas,,masai,massaï
284
+ may,ms,malay,malais
285
+ mdf,,moksha,moksa
286
+ mdr,,mandar,mandar
287
+ men,,mende,mendé
288
+ mga,,middle irish,irlandais moyen
289
+ mic,,mi'kmaq,mi'kmaq
290
+ min,,minangkabau,minangkabau
291
+ mis,,uncoded languages,langues non codées
292
+ mac,mk,macedonian,macédonien
293
+ mkh,,mon-khmer languages,môn-khmer
294
+ mlg,mg,malagasy,malgache
295
+ mlt,mt,maltese,maltais
296
+ mnc,,manchu,mandchou
297
+ mni,,manipuri,manipuri
298
+ mno,,manobo languages,manobo
299
+ moh,,mohawk,mohawk
300
+ mon,mn,mongolian,mongol
301
+ mos,,mossi,moré
302
+ mao,mi,maori,maori
303
+ may,ms,malay,malais
304
+ mul,,multiple languages,multilingue
305
+ mun,,munda languages,mounda
306
+ mus,,creek,muskogee
307
+ mwl,,mirandese,mirandais
308
+ mwr,,marwari,marvari
309
+ bur,my,burmese,birman
310
+ myn,,mayan languages,maya
311
+ myv,,erzya,erza
312
+ nah,,nahuatl languages,nahuatl
313
+ nai,,north american indian languages,nord-amérindien
314
+ nap,,neaponatan,napolitain
315
+ nau,na,nauru,nauruan
316
+ nav,nv,navajo,navaho
317
+ nbl,nr,south ndebele,ndébélé du sud
318
+ nde,nd,north ndebele,ndébélé du nord
319
+ ndo,ng,ndonga,ndonga
320
+ nds,,low german saxon|,bas allemand saxon
321
+ nep,ne,nepali,népalais
322
+ new,,nepal bhasa,nepal bhasa
323
+ nia,,nias,nias
324
+ nic,,niger-kordofanian languages,nigéro-kordofanien
325
+ niu,,niuean,niué
326
+ dut,nl,dutch,néerlandais
327
+ nno,nn,nynorsk norwegian,norvégien nynorsk
328
+ nob,nb,bokmål norwegian,norvégien bokmål
329
+ nog,,nogai,nogaï
330
+ non,,old norse,vieux norrois
331
+ nor,no,norwegian,norvégien
332
+ nqo,,n'ko,n'ko
333
+ nso,,pedi|northern sotho,pedi|sotho du nord
334
+ nub,,nubian languages,nubien
335
+ nwc,,classical newari newari nepal bhasa,newari classique
336
+ nya,ny,chichewa|nyanja,chichewa|nyanja
337
+ nym,,nyamwezi,nyamwezi
338
+ nyn,,nyankole,nyankolé
339
+ nyo,,nyoro,nyoro
340
+ nzi,,nzima,nzema
341
+ oci,oc,occitan,occitan
342
+ oji,oj,ojibwa,ojibwa
343
+ ori,or,oriya,oriya
344
+ orm,om,oromo,galla
345
+ osa,,osage,osage
346
+ oss,os,ossetian,ossète
347
+ ota,,ottoman turkish,turc ottoman
348
+ oto,,otomian languages,otomi
349
+ paa,,papuan languages,papoue
350
+ pag,,pangasinan,pangasinan
351
+ pal,,pahlavi,pahlavi
352
+ pam,,pampanga,pampangan
353
+ pan,pa,panjabi,pendjabi
354
+ pap,,papiamento,papiamento
355
+ pau,,palauan,palau
356
+ peo,,old persian,vieux perse
357
+ per,,farsi,persan
358
+ phi,,philippine languages,philippine
359
+ phn,,phoenician,phénicien
360
+ pli,pi,pali,pali
361
+ pny,,pinyin,pinyin
362
+ pol,pl,polish,polonais
363
+ pon,,pohnpeian,pohnpei
364
+ por,pt,portuguese,portugais
365
+ pra,,prakrit languages,prâkrit
366
+ pro,,old provençal occitan,provençal ancien ancien
367
+ pus,ps,pushto,pachto
368
+ qaa-qtz,,reserved for local use,réservée à l'usage local
369
+ que,qu,quechua,quechua
370
+ raj,,rajasthani,rajasthani
371
+ rap,,rapanui,rapanui
372
+ rar,,rarotongan islands maori,rarotonga des îles cook
373
+ roa,,romance languages,romane
374
+ roh,rm,romansh,romanche
375
+ rom,,romany,tsigane
376
+ rum,ro,romanian|moldovan,roumain
377
+ rum,ro,romanian|moldovan,roumain
378
+ run,rn,rundi,rundi
379
+ rup,,aromanian|macedo-romanian,aroumain-roumain
380
+ rus,ru,russian,russe
381
+ sad,,sandawe,sandawe
382
+ sag,sg,sango,sango
383
+ sah,,yakut,iakoute
384
+ sai,,south american indian languages,sud-amérindien
385
+ sal,,salishan languages,salishen
386
+ sam,,samaritan aramaic,samaritain
387
+ san,sa,sanskrit,sanskrit
388
+ sas,,sasak,sasak
389
+ sat,,santali,santal
390
+ scn,,sicilian,sicilien
391
+ sco,,scots,écossais
392
+ sel,,selkup,selkoupe
393
+ sem,,semitic languages,sémitique
394
+ sga,,old irish,irlandais ancien
395
+ sgn,,sign languages,langues des signes
396
+ shn,,shan,chan
397
+ sid,,sidamo,sidamo
398
+ sin,si,sinhala,singhalais
399
+ sio,,siouan languages,sioux
400
+ sit,,sino-tibetan languages,sino-tibétaine
401
+ sla,,slavic languages,slave
402
+ slo,sk,slovak,slovaque
403
+ slv,sl,slovenian,slovène
404
+ sma,,southern sami,sami du sud
405
+ sme,se,northern sami,sami du nord
406
+ smi,,sami languages,same
407
+ smj,,lule sami,sami de lule
408
+ smn,,inari sami,sami d'inari
409
+ smo,sm,samoan,samoan
410
+ sms,,skolt sami,sami skolt
411
+ sna,sn,shona,shona
412
+ snd,sd,sindhi,sindhi
413
+ snk,,soninke,soninké
414
+ sog,,sogdian,sogdien
415
+ som,so,somali,somali
416
+ son,,songhai languages,songhai
417
+ sot,st,southern sotho,sotho du sud
418
+ spa,es,spanish,espagnol
419
+ alb,sq,albanian,albanais
420
+ srd,sc,sardinian,sarde
421
+ srn,,sranan tongo,sranan tongo
422
+ srp,sr,serbian,serbe
423
+ srr,,serer,sérère
424
+ ssa,,nilo-saharan languages,nilo-saharien
425
+ ssw,ss,swati,swati
426
+ suk,,sukuma,sukuma
427
+ sun,su,sundanese,soundanais
428
+ sus,,susu,soussou
429
+ sux,,sumerian,sumérien
430
+ swa,sw,swahili,swahili
431
+ swe,sv,swedish,suédois
432
+ syc,,classical syriac,syriaque classique
433
+ syr,,syriac,syriaque
434
+ tah,ty,tahitian,tahitien
435
+ tai,,tai languages,tai
436
+ tam,ta,tamil,tamoul
437
+ tat,tt,tatar,tatar
438
+ tel,te,telugu,télougou
439
+ tem,,timne,temne
440
+ ter,,tereno,tereno
441
+ tet,,tetum,tetum
442
+ tgk,tg,tajik,tadjik
443
+ tgl,tl,tagalog,tagalog
444
+ tha,th,thai,thaï
445
+ tib,bo,tibetan,tibétain
446
+ tig,,tigre,tigré
447
+ tir,ti,tigrinya,tigrigna
448
+ tiv,,tiv,tiv
449
+ tkl,,tokelau,tokelau
450
+ tlh,,klingon-hol,klingon
451
+ tli,,tlingit,tlingit
452
+ tmh,,tamashek,tamacheq
453
+ tog,,tonga,tonga
454
+ ton,to,tonga,tongan
455
+ tpi,,tok pisin,tok pisin
456
+ tsi,,tsimshian,tsimshian
457
+ tsn,tn,tswana,tswana
458
+ tso,ts,tsonga,tsonga
459
+ tuk,tk,turkmen,turkmène
460
+ tum,,tumbuka,tumbuka
461
+ tup,,tupi languages,tupi
462
+ tur,tr,turkish,turc
463
+ tut,,altaic languages,altaïque
464
+ tvl,,tuvalu,tuvalu
465
+ twi,tw,twi,twi
466
+ tyv,,tuvinian,touva
467
+ udm,,udmurt,oudmourte
468
+ uga,,ugaritic,ougaritique
469
+ uig,ug,uighur,ouïgour
470
+ ukr,uk,ukrainian,ukrainien
471
+ umb,,umbundu,umbundu
472
+ und,,undetermined,indéterminée
473
+ urd,ur,urdu,ourdou
474
+ uzb,uz,uzbek,ouszbek
475
+ vai,,vai,vaï
476
+ ven,ve,venda,venda
477
+ vie,vi,vietnamese,vietnamien
478
+ vol,vo,volapük,volapük
479
+ vot,,votic,vote
480
+ wak,,wakashan languages,wakashane
481
+ wal,,wolaitta,wolaitta
482
+ war,,waray,waray
483
+ was,,washo,washo
484
+ wel,cy,welsh,gallois
485
+ wen,,sorbian languages,sorabe
486
+ wln,wa,walloon,wallon
487
+ wol,wo,wolof,wolof
488
+ xal,,kalmyk,kalmouk
489
+ xho,xh,xhosa,xhosa
490
+ yao,,yao,yao
491
+ yap,,yapese,yapois
492
+ yid,yi,yiddish,yiddish
493
+ yor,yo,yoruba,yoruba
494
+ ypk,,yupik languages,yupik
495
+ zap,,zapotec,zapotèque
496
+ zbl,,blissymbols|bliss,symboles bliss
497
+ zen,,zenaga,zenaga
498
+ zha,za,zhuang,zhuang
499
+ chi,zh,chinese,chinois
500
+ znd,,zande languages,zandé
501
+ zul,zu,zulu,zoulou
502
+ zun,,zuni,zuni
503
+ zxx,,no linguistic content applicable,pas de contenu linguistique applicable
504
+ zza,,zaza|dimli|kirmanjki,zaza|dimli|kirmanjki