treat 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (147) hide show
  1. data/INSTALL +0 -0
  2. data/LICENSE +28 -0
  3. data/README +0 -0
  4. data/TODO +67 -0
  5. data/bin/INFO +1 -0
  6. data/examples/benchmark.rb +81 -0
  7. data/examples/keywords.rb +60 -0
  8. data/examples/texts/bugged_out.txt +26 -0
  9. data/examples/texts/half_cocked_basel.txt +16 -0
  10. data/examples/texts/hedge_funds.txt +24 -0
  11. data/examples/texts/hose_and_dry.txt +19 -0
  12. data/examples/texts/hungarys_troubles.txt +46 -0
  13. data/examples/texts/indias_slowdown.txt +15 -0
  14. data/examples/texts/merkozy_rides_again.txt +24 -0
  15. data/examples/texts/prada_is_not_walmart.txt +9 -0
  16. data/examples/texts/republican_nomination.txt +26 -0
  17. data/examples/texts/to_infinity_and_beyond.txt +15 -0
  18. data/lib/treat.rb +91 -0
  19. data/lib/treat/buildable.rb +115 -0
  20. data/lib/treat/categories.rb +29 -0
  21. data/lib/treat/category.rb +28 -0
  22. data/lib/treat/delegatable.rb +90 -0
  23. data/lib/treat/detectors.rb +28 -0
  24. data/lib/treat/detectors/encoding/native.rb +12 -0
  25. data/lib/treat/detectors/encoding/r_chardet19.rb +24 -0
  26. data/lib/treat/detectors/format/file.rb +36 -0
  27. data/lib/treat/detectors/language/language_detector.rb +19 -0
  28. data/lib/treat/detectors/language/what_language.rb +29 -0
  29. data/lib/treat/entities.rb +52 -0
  30. data/lib/treat/entities/collection.rb +19 -0
  31. data/lib/treat/entities/constituents.rb +15 -0
  32. data/lib/treat/entities/document.rb +11 -0
  33. data/lib/treat/entities/entity.rb +242 -0
  34. data/lib/treat/entities/sentence.rb +8 -0
  35. data/lib/treat/entities/text.rb +7 -0
  36. data/lib/treat/entities/tokens.rb +37 -0
  37. data/lib/treat/entities/zones.rb +17 -0
  38. data/lib/treat/exception.rb +5 -0
  39. data/lib/treat/extractors.rb +41 -0
  40. data/lib/treat/extractors/key_sentences/topics_frequency.rb +49 -0
  41. data/lib/treat/extractors/named_entity/abner.rb +20 -0
  42. data/lib/treat/extractors/named_entity/stanford.rb +174 -0
  43. data/lib/treat/extractors/statistics/frequency.rb +22 -0
  44. data/lib/treat/extractors/statistics/frequency_of.rb +17 -0
  45. data/lib/treat/extractors/statistics/position_in.rb +13 -0
  46. data/lib/treat/extractors/statistics/transition_matrix.rb +105 -0
  47. data/lib/treat/extractors/statistics/transition_probability.rb +53 -0
  48. data/lib/treat/extractors/time/chronic.rb +12 -0
  49. data/lib/treat/extractors/time/native.rb +12 -0
  50. data/lib/treat/extractors/time/nickel.rb +45 -0
  51. data/lib/treat/extractors/topic_words/lda.rb +71 -0
  52. data/lib/treat/extractors/topic_words/lda/data.dat +46 -0
  53. data/lib/treat/extractors/topic_words/lda/wiki.yml +121 -0
  54. data/lib/treat/extractors/topics/reuters.rb +91 -0
  55. data/lib/treat/extractors/topics/reuters/industry.xml +2717 -0
  56. data/lib/treat/extractors/topics/reuters/region.xml +13585 -0
  57. data/lib/treat/extractors/topics/reuters/topics.xml +17977 -0
  58. data/lib/treat/feature.rb +53 -0
  59. data/lib/treat/formatters.rb +44 -0
  60. data/lib/treat/formatters/cleaners/html.rb +17 -0
  61. data/lib/treat/formatters/readers/autoselect.rb +35 -0
  62. data/lib/treat/formatters/readers/gocr.rb +24 -0
  63. data/lib/treat/formatters/readers/html.rb +13 -0
  64. data/lib/treat/formatters/readers/ocropus.rb +31 -0
  65. data/lib/treat/formatters/readers/pdf.rb +17 -0
  66. data/lib/treat/formatters/readers/txt.rb +15 -0
  67. data/lib/treat/formatters/serializers/xml.rb +48 -0
  68. data/lib/treat/formatters/serializers/yaml.rb +15 -0
  69. data/lib/treat/formatters/serializers/yaml/helper.rb +96 -0
  70. data/lib/treat/formatters/unserializers/autoselect.rb +19 -0
  71. data/lib/treat/formatters/unserializers/xml.rb +79 -0
  72. data/lib/treat/formatters/unserializers/yaml.rb +15 -0
  73. data/lib/treat/formatters/visualizers/dot.rb +73 -0
  74. data/lib/treat/formatters/visualizers/html.rb +12 -0
  75. data/lib/treat/formatters/visualizers/inspect.rb +16 -0
  76. data/lib/treat/formatters/visualizers/short_value.rb +14 -0
  77. data/lib/treat/formatters/visualizers/standoff.rb +41 -0
  78. data/lib/treat/formatters/visualizers/tree.rb +28 -0
  79. data/lib/treat/formatters/visualizers/txt.rb +31 -0
  80. data/lib/treat/group.rb +96 -0
  81. data/lib/treat/inflectors.rb +50 -0
  82. data/lib/treat/inflectors/cardinal_words/linguistics.rb +45 -0
  83. data/lib/treat/inflectors/conjugators/linguistics.rb +30 -0
  84. data/lib/treat/inflectors/declensors/en.rb +18 -0
  85. data/lib/treat/inflectors/declensors/linguistics.rb +30 -0
  86. data/lib/treat/inflectors/lemmatizers/e_lemma.rb +12 -0
  87. data/lib/treat/inflectors/lemmatizers/e_lemma/Makefile +213 -0
  88. data/lib/treat/inflectors/lemmatizers/e_lemma/elemma.c +68 -0
  89. data/lib/treat/inflectors/lemmatizers/e_lemma/extconf.rb +6 -0
  90. data/lib/treat/inflectors/ordinal_words/linguistics.rb +21 -0
  91. data/lib/treat/inflectors/stemmers/porter.rb +158 -0
  92. data/lib/treat/inflectors/stemmers/porter_c.rb +23 -0
  93. data/lib/treat/inflectors/stemmers/uea.rb +30 -0
  94. data/lib/treat/lexicalizers.rb +49 -0
  95. data/lib/treat/lexicalizers/category/from_tag.rb +30 -0
  96. data/lib/treat/lexicalizers/linkages/naive.rb +63 -0
  97. data/lib/treat/lexicalizers/synsets/rita_wn.rb +23 -0
  98. data/lib/treat/lexicalizers/synsets/wordnet.rb +72 -0
  99. data/lib/treat/lexicalizers/tag/brill.rb +101 -0
  100. data/lib/treat/lexicalizers/tag/lingua.rb +114 -0
  101. data/lib/treat/lexicalizers/tag/stanford.rb +86 -0
  102. data/lib/treat/processors.rb +45 -0
  103. data/lib/treat/processors/chunkers/txt.rb +27 -0
  104. data/lib/treat/processors/parsers/enju.rb +214 -0
  105. data/lib/treat/processors/parsers/stanford.rb +60 -0
  106. data/lib/treat/processors/segmenters/punkt.rb +48 -0
  107. data/lib/treat/processors/segmenters/stanford.rb +45 -0
  108. data/lib/treat/processors/segmenters/tactful.rb +34 -0
  109. data/lib/treat/processors/tokenizers/macintyre.rb +76 -0
  110. data/lib/treat/processors/tokenizers/multilingual.rb +31 -0
  111. data/lib/treat/processors/tokenizers/perl.rb +96 -0
  112. data/lib/treat/processors/tokenizers/punkt.rb +42 -0
  113. data/lib/treat/processors/tokenizers/stanford.rb +33 -0
  114. data/lib/treat/processors/tokenizers/tactful.rb +59 -0
  115. data/lib/treat/proxies.rb +66 -0
  116. data/lib/treat/registrable.rb +26 -0
  117. data/lib/treat/resources.rb +10 -0
  118. data/lib/treat/resources/categories.rb +18 -0
  119. data/lib/treat/resources/delegates.rb +96 -0
  120. data/lib/treat/resources/dependencies.rb +0 -0
  121. data/lib/treat/resources/edges.rb +8 -0
  122. data/lib/treat/resources/formats.rb +23 -0
  123. data/lib/treat/resources/languages.rb +86 -0
  124. data/lib/treat/resources/languages.txt +504 -0
  125. data/lib/treat/resources/tags.rb +393 -0
  126. data/lib/treat/sugar.rb +43 -0
  127. data/lib/treat/tree.rb +174 -0
  128. data/lib/treat/utilities.rb +127 -0
  129. data/lib/treat/visitable.rb +27 -0
  130. data/test/profile.rb +2 -0
  131. data/test/tc_detectors.rb +27 -0
  132. data/test/tc_entity.rb +105 -0
  133. data/test/tc_extractors.rb +48 -0
  134. data/test/tc_formatters.rb +46 -0
  135. data/test/tc_inflectors.rb +39 -0
  136. data/test/tc_lexicalizers.rb +39 -0
  137. data/test/tc_processors.rb +36 -0
  138. data/test/tc_resources.rb +27 -0
  139. data/test/tc_treat.rb +64 -0
  140. data/test/tc_tree.rb +60 -0
  141. data/test/tests.rb +19 -0
  142. data/test/texts.rb +20 -0
  143. data/test/texts/english/long.html +24 -0
  144. data/test/texts/english/long.txt +22 -0
  145. data/test/texts/english/medium.txt +5 -0
  146. data/test/texts/english/short.txt +3 -0
  147. metadata +412 -0
@@ -0,0 +1,26 @@
1
+ module Treat
2
+ module Registrable
3
+ # Registers a token in the @token_registry
4
+ # hash in the root node.
5
+ def register_token(token)
6
+ if is_root?
7
+ @token_registry ||= {value: {}, id: {}}
8
+ @token_registry[:id][token.id] = token
9
+ @token_registry[:value][token.value] ||= []
10
+ @token_registry[:value][token.value] << token
11
+ else
12
+ @parent.register_token(token)
13
+ end
14
+ end
15
+ # Find the token registry, which is
16
+ # always in the root node.
17
+ def token_registry
18
+ if has_parent?
19
+ @parent.token_registry
20
+ else
21
+ @token_registry ||= {value: {}, id: {}}
22
+ @token_registry
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,10 @@
1
+ module Treat
2
+ module Resources
3
+ require 'treat/resources/delegates'
4
+ require 'treat/resources/dependencies'
5
+ require 'treat/resources/edges'
6
+ require 'treat/resources/languages'
7
+ require 'treat/resources/tags'
8
+ require 'treat/resources/categories'
9
+ end
10
+ end
@@ -0,0 +1,18 @@
1
+ module Treat
2
+ module Resources
3
+ class Categories
4
+ List = [
5
+ :adjective, :adverb, :noun, :verb, :interjection,
6
+ :clitic, :coverb, :conjunction, :determiner, :particle,
7
+ :preposition, :pronoun, :number, :symbol, :punctuation,
8
+ :complementizer
9
+ ]
10
+ wttc = {}
11
+ Treat::Resources::Tags::AlignedWordTags.each_slice(2) do |desc, tags|
12
+ desc = desc.gsub(',', ' ,').split(' ')[0].downcase
13
+ tags.each { |tag| wttc[tag] = desc.intern }
14
+ end
15
+ WordTagToCategory = wttc
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,96 @@
1
+ module Treat
2
+ module Resources
3
+ module Delegates
4
+ class English
5
+ Extractors = {
6
+ time: [:chronic],
7
+ topics: [:reuters],
8
+ topic_words: [:lda],
9
+ key_sentences: [:topics_frequency]
10
+ }
11
+ Processors = {
12
+ chunkers: [:txt],
13
+ parsers: [:enju, :stanford],
14
+ segmenters: [:tactful, :punkt, :stanford],
15
+ tokenizers: [:multilingual, :macintyre, :perl, :punkt, :tactful, :stanford]
16
+ }
17
+ Lexicalizers = {
18
+ category: [:from_tag],
19
+ linkages: [:naive],
20
+ synsets: [:wordnet, :rita_wn],
21
+ tag: [:brill, :lingua, :stanford]
22
+ }
23
+ Inflectors = {
24
+ conjugators: [:linguistics],
25
+ declensors: [:linguistics, :english],
26
+ lemmatizers: [:e_lemma],
27
+ stemmers: [:porter_c, :porter, :uea],
28
+ ordinal_words: [:linguistics],
29
+ cardinal_words: [:linguistics]
30
+ }
31
+ end
32
+ class German
33
+ Extractors = {}
34
+ Inflectors = {}
35
+ Lexicalizers = {
36
+ tag: [:stanford]
37
+ }
38
+ Processors = {
39
+ chunkers: [:txt],
40
+ parsers: [:stanford],
41
+ segmenters: [:tactful, :punkt, :stanford],
42
+ tokenizers: [:multilingual, :macintyre, :perl, :punkt, :tactful, :stanford]
43
+ }
44
+ end
45
+ class French
46
+ Extractors = {}
47
+ Inflectors = {}
48
+ Lexicalizers = {
49
+ tag: [:stanford]
50
+ }
51
+ Processors = {
52
+ chunkers: [:txt],
53
+ parsers: [:stanford],
54
+ segmenters: [:tactful, :punkt, :stanford],
55
+ tokenizers: [:multilingual, :macintyre, :perl, :punkt, :tactful, :stanford]
56
+ }
57
+ end
58
+ class Italian
59
+ Extractors = {}
60
+ Inflectors = {}
61
+ Lexicalizers = {}
62
+ Processors = {
63
+ chunkers: [:txt],
64
+ segmenters: [:tactful, :punkt, :stanford],
65
+ tokenizers: [:multilingual, :macintyre, :perl, :punkt, :tactful, :stanford]
66
+ }
67
+ end
68
+ class Arabic
69
+ Extractors = {}
70
+ Inflectors = {}
71
+ Lexicalizers = {
72
+ tag: [:stanford]
73
+ }
74
+ Processors = {
75
+ parsers: [:stanford]
76
+ }
77
+ end
78
+ class Chinese
79
+ Extractors = {}
80
+ Inflectors = {}
81
+ Lexicalizers = {
82
+ tag: [:stanford]
83
+ }
84
+ Processors = {}
85
+ end
86
+ class Xinhua
87
+ Extractors = {}
88
+ Inflectors = {}
89
+ Lexicalizers = {}
90
+ Processors = {
91
+ parsers: [:stanford]
92
+ }
93
+ end
94
+ end
95
+ end
96
+ end
File without changes
@@ -0,0 +1,8 @@
1
+ # http://nlp.stanford.edu/software/dependencies_manual.pdf
2
+
3
+ =begin
4
+ ENJU
5
+
6
+ pred: noun_arg0, noun_arg1, noun_arg2, noun_arg12, it_arg1, there_arg0, quote_arg2, quote_arg12, quote_arg23, quote_arg123, poss_arg2, poss_arg12, aux_arg12, aux_mod_arg12, verb_arg1, verb_arg12, verb_arg123, verb_arg1234, verb_mod_arg1, verb_mod_arg12, verb_mod_arg123, verb_mod_arg1234, adj_arg1, adj_arg12, adj_mod_arg1, adj_mod_arg12, conj_arg1, conj_arg12, conj_arg123, coord_arg12, det_arg1, prep_arg12, prep_arg123, prep_mod_arg12, prep_mod_arg123, lgs_arg2, dtv_arg2, punct_arg1, app_arg12, lparen_arg123, rparen_arg0, comp_arg1, comp_arg12, comp_mod_arg1, relative_arg1, relative_arg12
7
+
8
+ =end
@@ -0,0 +1,23 @@
1
+ module Treat
2
+ module Resources
3
+ module Format
4
+
5
+ class XML
6
+ require 'nokogiri'
7
+ def self.validate(document_path, schema_path, root_element)
8
+ schema = Nokogiri::XML::Schema(File.read(schema_path))
9
+ document = Nokogiri::XML(File.read(document_path))
10
+ schema.validate(document.xpath("//#{root_element}").to_s)
11
+ end
12
+ validate('input.xml', 'schema.xdf', 'container').each do |error|
13
+ puts error.message
14
+ end
15
+ end
16
+
17
+ class HTML < XML
18
+
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,86 @@
1
+ module Treat
2
+ module Resources
3
+ # Dictionnary of ISO-639-1, ISO-639-2 language codes,
4
+ # as well as their full text description in both
5
+ # English and French.
6
+ module Languages
7
+ ISO639_1 = 1
8
+ ISO639_2 = 2
9
+ # Describe a language code (ISO-639-1 or ISO-639-2)
10
+ # or its full text description in full French or English.
11
+ def self.describe(lang, desc_lang = :en)
12
+ raise "Must provide a non-nil language identifier to describe." if lang.nil?
13
+ lang = find(lang).to_s
14
+ if [:en, :eng, :english, :anglais].include?(desc_lang)
15
+ l = @@english_full.key(lang)
16
+ elsif [:fr, :fra, :french, :french].include?(desc_lang)
17
+ l = @@french_full.key(lang)
18
+ else
19
+ raise Treat::Exception,
20
+ "Unknown language to describe: #{desc_lang}."
21
+ end
22
+ not_found(lang) if l.nil?
23
+ l.intern
24
+ end
25
+ # Raise an error message when a language code
26
+ # or description is not found and suggest
27
+ # possible misspellings.
28
+ def self.not_found(lang)
29
+ msg = "Language '#{lang}' does not exist."
30
+ all = @@iso639_2.keys + @@iso639_1.keys +
31
+ @@english_full.keys + @@french_full.keys
32
+ msg += did_you_mean?(all, lang)
33
+ raise Treat::Exception, msg
34
+ end
35
+ # Find a language by ISO-639-1 or ISO-639-2 code
36
+ # or full name (in English or French) and return
37
+ # the ISO-639-1 or ISO-639-2 language code as a
38
+ # lowercase identifier.
39
+ def self.find(lang, rc = ISO639_2)
40
+ raise "Must provide a non-nil language identifier to describe." if lang.nil?
41
+ get_languages
42
+ lang = lang.to_s.downcase
43
+ if @@iso639_1.has_key?(lang)
44
+ return :"#{lang}" if rc == ISO639_1
45
+ return :"#{@@iso639_1[lang]}" if rc == ISO639_2
46
+ elsif @@iso639_2.has_key?(lang)
47
+ return :"#{lang}" if rc == ISO639_2
48
+ return :"#{@@iso639_2[lang]}" if rc == ISO639_1
49
+ elsif @@english_full.has_key?(lang)
50
+ return :"#{@@english_full[lang]}" if rc == ISO639_2
51
+ return :"#{@@iso639_2[@@english_full[lang]]}" if rc == ISO639_1
52
+ elsif @@french_full.has_key?(lang)
53
+ return :"#{@@french_full[lang]}" if rc == ISO639_2
54
+ return :"#{@@iso639_1[@@french_full[lang]]}" if rc == ISO639_2
55
+ else
56
+ not_found(lang)
57
+ end
58
+ end
59
+ @@loaded = false
60
+ # Get the languages from the dictionary.
61
+ def self.get_languages
62
+ return if @@loaded
63
+ @@iso639_1 = {}; @@iso639_2 = {};
64
+ @@english_full = {}; @@french_full = {}
65
+ languages = IO.readlines(Treat.lib + '/treat/resources/languages.txt')
66
+ languages.each do |language|
67
+ iso639_2, iso639_1, english_desc, french_desc =
68
+ language.split(',')
69
+ @@iso639_1[iso639_1] = iso639_2
70
+ @@iso639_2[iso639_2] = iso639_1
71
+ unless english_desc.nil?
72
+ english_desc.strip.downcase.split('|').each do |l|
73
+ @@english_full[l.downcase.strip] = iso639_2
74
+ end
75
+ end
76
+ unless french_desc.nil?
77
+ french_desc.strip.downcase.split('|').each do |l|
78
+ @@french_full[l.downcase.strip] = iso639_2
79
+ end
80
+ end
81
+ end
82
+ @@loaded = true
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,504 @@
1
+ aar,aa,afar,afar
2
+ abk,ab,abkhazian,abkhaze
3
+ ace,,achinese,aceh
4
+ ach,,acoli,acoli
5
+ ada,,adangme,adangme
6
+ ady,,adyghe,adyghé
7
+ afa,,afro-asiatic languages,afro-asiatique
8
+ afh,,afrihili,afrihili
9
+ afr,af,afrikaans,afrikaans
10
+ ain,,ainu,aïnou
11
+ aka,ak,akan,akan
12
+ akk,,akkadian,akkadien
13
+ alb,sq,albanian,albanais
14
+ ale,,aleut,aléoute
15
+ alg,,algonquian languages,algonquine
16
+ alt,,southern altai,altai du sud
17
+ amh,am,amharic,amharique
18
+ ang,,old english,anglo-saxon
19
+ anp,,angika,angika
20
+ apa,,apache languages,apache
21
+ ara,ar,arabic,arabe
22
+ arc,,official aramaic|imperial aramaic,araméen d'empire
23
+ arg,an,aragonese,aragonais
24
+ arm,hy,armenian,arménien
25
+ arn,,mapudungun,mapudungun|mapuce
26
+ arp,,arapaho,arapaho
27
+ art,,artificial languages,artificielle
28
+ arw,,arawak,arawak
29
+ asm,as,assamese,assamais
30
+ ast,,asturian|leonese,asturien|léonais
31
+ ath,,athapascan languages,athapascane
32
+ aus,,australian languages,australien
33
+ ava,av,avaric,avar
34
+ ave,ae,avestan,avestique
35
+ awa,,awadhi,awadhi
36
+ aym,ay,aymara,aymara
37
+ aze,az,azerbaijani,azéri
38
+ bad,,banda languages,banda
39
+ bai,,bamileke languages,bamiléké
40
+ bak,ba,bashkir,bachkir
41
+ bal,,baluchi,baloutchi
42
+ bam,bm,bambara,bambara
43
+ ban,,balinese,balinais
44
+ baq,eu,basque,basque
45
+ bas,,basa,basa
46
+ bat,,baltic languages,balte
47
+ bej,,beja,bedja
48
+ bel,be,belarusian,biélorusse
49
+ bem,,bemba,bemba
50
+ ben,bn,bengali,bengali
51
+ ber,,berber languages,berbère
52
+ bho,,bhojpuri,bhojpuri
53
+ bih,bh,bihari languages,langues biharis
54
+ bik,,bikol,bikol
55
+ bin,,bini,bini
56
+ bis,bi,bislama,bichlamar
57
+ bla,,siksika,blackfoot
58
+ bnt,,bantu languages,bantou
59
+ tib,bo,tibetan,tibétain
60
+ bos,bs,bosnian,bosniaque
61
+ bra,,braj,braj
62
+ bre,br,breton,breton
63
+ btk,,batak languages,batak
64
+ bua,,buriat,bouriate
65
+ bug,,buginese,bugi
66
+ bul,bg,bulgarian,bulgare
67
+ bur,my,burmese,birman
68
+ byn,,blin,blin
69
+ cad,,caddo,caddo
70
+ cai,,central american indian languages,amérindiennes de l'amérique centrale
71
+ car,,galibi carib,karib|carib
72
+ cat,ca,catalan,catalan
73
+ cau,,caucasian languages,caucasien
74
+ ceb,,cebuano,cebuano
75
+ cel,,celtic languages,celtique
76
+ cze cs,czech,tchèque
77
+ cha,ch,chamorro,chamorro
78
+ chb,,chibcha,chibcha
79
+ che,ce,chechen,tchétchène
80
+ chg,,chagatai,djaghataï
81
+ chi,zh,chinese,chinois
82
+ chk,,chuukese,chuuk
83
+ chm,,mari,mari
84
+ chn,,chinook,chinook
85
+ cho,,choctaw,choctaw
86
+ chp,,chipewyan suline,chipewyan
87
+ chr,,cherokee,cherokee
88
+ chu,cu,church slavic slavonic slavonic bulgarian church slavonic,slavon d'église slave naturgique bulgare
89
+ chv,cv,chuvash,tchouvache
90
+ chy,,cheyenne,cheyenne
91
+ cmc,,chamic languages,chame
92
+ cop,,coptic,copte
93
+ cor,kw,cornish,cornique
94
+ cos,co,corsican,corse
95
+ cpe,,creole,créole
96
+ cpf,,creole,créole
97
+ cpp,,creole,créole
98
+ cre,cr,cree,cree
99
+ crh,,crimean tatar turkish,tatar de crimé
100
+ crp,,creoles and pidgins,créoles et pidgins
101
+ csb,,kashubian,kachoube
102
+ cus,,cushitic languages,couchitique
103
+ wel,cy,welsh,gallois
104
+ cze,cs,czech,tchèque
105
+ dak,,dakota,dakota
106
+ dan,da,danish,danois
107
+ dar,,dargwa,dargwa
108
+ day,,land dayak languages,dayak
109
+ del,,delaware,delaware
110
+ den,,slave,esclave
111
+ ger,de,german,allemand
112
+ dgr,,dogrib,dogrib
113
+ din,,dinka,dinka
114
+ div,dv,divehi|maldivian,maldivien
115
+ doi,,dogri,dogri
116
+ dra,,dravidian languages,dravidien
117
+ dsb,,lower sorbian,bas-sorabe
118
+ dua,,duala,douala
119
+ dum,,middle dutch,néerlandais moyen
120
+ dut,nl,dutch,néerlandais
121
+ dyu,,dyula,dioula
122
+ dzo,dz,dzongkha,dzongkha
123
+ efi,,efik,efik
124
+ egy,,egyptian,égyptien
125
+ eka,,ekajuk,ekajuk
126
+ gre,el,greek,grec
127
+ elx,,elamite,élamite
128
+ eng,en,english,anglais
129
+ enm,,middle english,anglais moyen
130
+ epo,eo,esperanto,espéranto
131
+ est,et,estonian,estonien
132
+ baq,eu,basque,basque
133
+ ewe,ee,ewe,éwé
134
+ ewo,,ewondo,éwondo
135
+ fan,,fang,fang
136
+ fao,fo,faroese,féroïen
137
+ per,fa,persian,persan
138
+ fat,,fanti,fanti
139
+ fij,fj,fijian,fidjien
140
+ fil,,filipino,filipino
141
+ fin,fi,finnish,finnois
142
+ fiu,,finno-ugrian languages,finno-ougrien
143
+ fon,,fon,fon
144
+ fre,fr,french,français
145
+ frm,,middle french,français moyen
146
+ fro,,old french,français ancien
147
+ frr,,northern frisian,frison septentrional
148
+ frs,,eastern frisian,frison oriental
149
+ fry,fy,western frisian,frison occidental
150
+ ful,ff,fulah,peul
151
+ fur,,friulian,frioulan
152
+ gaa,,ga,ga
153
+ gay,,gayo,gayo
154
+ gba,,gbaya,gbaya
155
+ gem,,germanic languages,germanique
156
+ geo,ka,georgian,géorgien
157
+ ger,de,german,allemand
158
+ gez,,geez,guèze
159
+ gil,,gilbertese,kiribati
160
+ gla,gd,gaelic gaelic,gaélique écossais
161
+ gle,ga,irish,irlandais
162
+ glg,gl,galician,galicien
163
+ glv,gv,manx,manx
164
+ gmh,,middle high german,moyen haut allemand
165
+ goh,,old high german,vieux haut allemand
166
+ gon,,gondi,gond
167
+ gor,,gorontalo,gorontalo
168
+ got,,gothic,gothique
169
+ grb,,grebo,grebo
170
+ grc,,ancient greek,grec ancien
171
+ gre,el,greek,grec
172
+ grn,gn,guarani,guarani
173
+ gsw,,swiss german|alsatian,suisse alémanique|alsacien
174
+ guj,gu,gujarati,goudjrati
175
+ gwi,,gwich'in,gwich'in
176
+ hai,,haida,haida
177
+ hat,ht,haitian creole,haïtien haïtien
178
+ hau,ha,hausa,haoussa
179
+ haw,,hawaiian,hawaïen
180
+ heb,he,hebrew,hébreu
181
+ her,hz,herero,herero
182
+ hil,,hiligaynon,hiligaynon
183
+ him,,himachali languages pahari languages,langues himachalis paharis occidentales
184
+ hin,hi,hindi,hindi
185
+ hit,,hittite,hittite
186
+ hmn,,hmong,hmong
187
+ hmo,ho,hiri motu,hiri motu
188
+ hrv,hr,croatian,croate
189
+ hsb,,upper sorbian,haut-sorabe
190
+ hun,hu,hungarian,hongrois
191
+ hup,,hupa,hupa
192
+ arm,hy,armenian,arménien
193
+ iba,,iban,iban
194
+ ibo,ig,igbo,igbo
195
+ ice,is,icelandic,islandais
196
+ ido,io,ido,ido
197
+ iii,ii,sichuan yi,yi de sichuan
198
+ ijo,,ijo languages,ijo
199
+ iku,iu,inuktitut,inuktitut
200
+ ile,ie,interlingue,interlingue
201
+ ilo,,iloko,ilocano
202
+ ina,ia,interlingua,interlingua
203
+ inc,,indic languages,indo-aryen
204
+ ind,id,indonesian,indonésien
205
+ ine,,indo-european languages,indo-européen
206
+ inh,,ingush,ingouche
207
+ ipk,ik,inupiaq,inupiaq
208
+ ira,,iranian languages,iranien
209
+ iro,,iroquoian languages,iroquoise
210
+ ice,is,icelandic,islandais
211
+ ita,it,italian,italien
212
+ jav,jv,javanese,javanais
213
+ jbo,,lojban,lojban
214
+ jpn,ja,japanese,japonais
215
+ jpr,,judeo-persian,judéo-persan
216
+ jrb,,judeo-arabic,judéo-arabe
217
+ kaa,,kara-kalpak,karakalpak
218
+ kab,,kabyle,kabyle
219
+ kac,,kachin,kachin
220
+ kal,kl,kalaallisut,groenlandais
221
+ kam,,kamba,kamba
222
+ kan,kn,kannada,kannada
223
+ kar,,karen languages,karen
224
+ kas,ks,kashmiri,kashmiri
225
+ geokat,ka,georgian,géorgien
226
+ kau,kr,kanuri,kanouri
227
+ kaw,,kawi,kawi
228
+ kaz,kk,kazakh,kazakh
229
+ kbd,,kabardian,kabardien
230
+ kha,,khasi,khasi
231
+ khi,,khoisan languages,khoïsan
232
+ khm,km,central khmer,khmer central
233
+ kho,,khotanese,khotanais
234
+ kik,ki,kikuyu,kikuyu
235
+ kin,rw,kinyarwanda,rwanda
236
+ kir,ky,kirghiz,kirghiz
237
+ kmb,,kimbundu,kimbundu
238
+ kok,,konkani,konkani
239
+ kom,kv,komi,kom
240
+ kon,kg,kongo,kongo
241
+ kor,ko,korean,coréen
242
+ kos,,kosraean,kosrae
243
+ kpe,,kpelle,kpellé
244
+ krc,,karachay-balkar,karatchai balkar
245
+ krl,,karelian,carélien
246
+ kro,,kru languages,krou
247
+ kru,,kurukh,kurukh
248
+ kua,kj,kuanyama,kuanyama
249
+ kum,,kumyk,koumyk
250
+ kur,ku,kurdish,kurde
251
+ kut,,kutenai,kutenai
252
+ lad,,ladino,judéo-espagnol
253
+ lah,,lahnda,lahnda
254
+ lam,,lamba,lamba
255
+ lao,lo,lao,lao
256
+ lat,la,latin,latin
257
+ lav,lv,latvian,letton
258
+ lez,,lezghian,lezghien
259
+ lim,li,limburgan|limburgish,limbourgeois
260
+ lin,ln,lingala,lingala
261
+ nat,lt,lithuanian,lituanien
262
+ lol,,mongo,mongo
263
+ loz,,lozi,lozi
264
+ ltz,lb,luxembourgish,luxembourgeois
265
+ lua,,luba-lulua,luba-lulua
266
+ lub,lu,luba-katanga,luba-katanga
267
+ lug,lg,ganda,ganda
268
+ lui,,luiseno,luiseno
269
+ lun,,lunda,lunda
270
+ luo,,luo,luo
271
+ lus,,lushai,lushai
272
+ mac,mk,macedonian,macédonien
273
+ mad,,madurese,madourais
274
+ mag,,magahi,magahi
275
+ mah,mh,marshallese,marshall
276
+ mai,,maithili,maithili
277
+ mak,,makasar,makassar
278
+ mal,ml,malayalam,malayalam
279
+ man,,mandingo,mandingue
280
+ mao,mi,maori,maori
281
+ map,,austronesian languages,austronésien
282
+ mar,mr,marathi,marathe
283
+ mas,,masai,massaï
284
+ may,ms,malay,malais
285
+ mdf,,moksha,moksa
286
+ mdr,,mandar,mandar
287
+ men,,mende,mendé
288
+ mga,,middle irish,irlandais moyen
289
+ mic,,mi'kmaq,mi'kmaq
290
+ min,,minangkabau,minangkabau
291
+ mis,,uncoded languages,langues non codées
292
+ mac,mk,macedonian,macédonien
293
+ mkh,,mon-khmer languages,môn-khmer
294
+ mlg,mg,malagasy,malgache
295
+ mlt,mt,maltese,maltais
296
+ mnc,,manchu,mandchou
297
+ mni,,manipuri,manipuri
298
+ mno,,manobo languages,manobo
299
+ moh,,mohawk,mohawk
300
+ mon,mn,mongolian,mongol
301
+ mos,,mossi,moré
302
+ mao,mi,maori,maori
303
+ may,ms,malay,malais
304
+ mul,,multiple languages,multilingue
305
+ mun,,munda languages,mounda
306
+ mus,,creek,muskogee
307
+ mwl,,mirandese,mirandais
308
+ mwr,,marwari,marvari
309
+ bur,my,burmese,birman
310
+ myn,,mayan languages,maya
311
+ myv,,erzya,erza
312
+ nah,,nahuatl languages,nahuatl
313
+ nai,,north american indian languages,nord-amérindien
314
+ nap,,neaponatan,napolitain
315
+ nau,na,nauru,nauruan
316
+ nav,nv,navajo,navaho
317
+ nbl,nr,south ndebele,ndébélé du sud
318
+ nde,nd,north ndebele,ndébélé du nord
319
+ ndo,ng,ndonga,ndonga
320
+ nds,,low german saxon|,bas allemand saxon
321
+ nep,ne,nepali,népalais
322
+ new,,nepal bhasa,nepal bhasa
323
+ nia,,nias,nias
324
+ nic,,niger-kordofanian languages,nigéro-kordofanien
325
+ niu,,niuean,niué
326
+ dut,nl,dutch,néerlandais
327
+ nno,nn,nynorsk norwegian,norvégien nynorsk
328
+ nob,nb,bokmål norwegian,norvégien bokmål
329
+ nog,,nogai,nogaï
330
+ non,,old norse,vieux norrois
331
+ nor,no,norwegian,norvégien
332
+ nqo,,n'ko,n'ko
333
+ nso,,pedi|northern sotho,pedi|sotho du nord
334
+ nub,,nubian languages,nubien
335
+ nwc,,classical newari newari nepal bhasa,newari classique
336
+ nya,ny,chichewa|nyanja,chichewa|nyanja
337
+ nym,,nyamwezi,nyamwezi
338
+ nyn,,nyankole,nyankolé
339
+ nyo,,nyoro,nyoro
340
+ nzi,,nzima,nzema
341
+ oci,oc,occitan,occitan
342
+ oji,oj,ojibwa,ojibwa
343
+ ori,or,oriya,oriya
344
+ orm,om,oromo,galla
345
+ osa,,osage,osage
346
+ oss,os,ossetian,ossète
347
+ ota,,ottoman turkish,turc ottoman
348
+ oto,,otomian languages,otomi
349
+ paa,,papuan languages,papoue
350
+ pag,,pangasinan,pangasinan
351
+ pal,,pahlavi,pahlavi
352
+ pam,,pampanga,pampangan
353
+ pan,pa,panjabi,pendjabi
354
+ pap,,papiamento,papiamento
355
+ pau,,palauan,palau
356
+ peo,,old persian,vieux perse
357
+ per,,farsi,persan
358
+ phi,,philippine languages,philippine
359
+ phn,,phoenician,phénicien
360
+ pli,pi,pali,pali
361
+ pny,,pinyin,pinyin
362
+ pol,pl,polish,polonais
363
+ pon,,pohnpeian,pohnpei
364
+ por,pt,portuguese,portugais
365
+ pra,,prakrit languages,prâkrit
366
+ pro,,old provençal occitan,provençal ancien ancien
367
+ pus,ps,pushto,pachto
368
+ qaa-qtz,,reserved for local use,réservée à l'usage local
369
+ que,qu,quechua,quechua
370
+ raj,,rajasthani,rajasthani
371
+ rap,,rapanui,rapanui
372
+ rar,,rarotongan islands maori,rarotonga des îles cook
373
+ roa,,romance languages,romane
374
+ roh,rm,romansh,romanche
375
+ rom,,romany,tsigane
376
+ rum,ro,romanian|moldovan,roumain
377
+ rum,ro,romanian|moldovan,roumain
378
+ run,rn,rundi,rundi
379
+ rup,,aromanian|macedo-romanian,aroumain-roumain
380
+ rus,ru,russian,russe
381
+ sad,,sandawe,sandawe
382
+ sag,sg,sango,sango
383
+ sah,,yakut,iakoute
384
+ sai,,south american indian languages,sud-amérindien
385
+ sal,,salishan languages,salishen
386
+ sam,,samaritan aramaic,samaritain
387
+ san,sa,sanskrit,sanskrit
388
+ sas,,sasak,sasak
389
+ sat,,santali,santal
390
+ scn,,sicilian,sicilien
391
+ sco,,scots,écossais
392
+ sel,,selkup,selkoupe
393
+ sem,,semitic languages,sémitique
394
+ sga,,old irish,irlandais ancien
395
+ sgn,,sign languages,langues des signes
396
+ shn,,shan,chan
397
+ sid,,sidamo,sidamo
398
+ sin,si,sinhala,singhalais
399
+ sio,,siouan languages,sioux
400
+ sit,,sino-tibetan languages,sino-tibétaine
401
+ sla,,slavic languages,slave
402
+ slo,sk,slovak,slovaque
403
+ slv,sl,slovenian,slovène
404
+ sma,,southern sami,sami du sud
405
+ sme,se,northern sami,sami du nord
406
+ smi,,sami languages,same
407
+ smj,,lule sami,sami de lule
408
+ smn,,inari sami,sami d'inari
409
+ smo,sm,samoan,samoan
410
+ sms,,skolt sami,sami skolt
411
+ sna,sn,shona,shona
412
+ snd,sd,sindhi,sindhi
413
+ snk,,soninke,soninké
414
+ sog,,sogdian,sogdien
415
+ som,so,somali,somali
416
+ son,,songhai languages,songhai
417
+ sot,st,southern sotho,sotho du sud
418
+ spa,es,spanish,espagnol
419
+ alb,sq,albanian,albanais
420
+ srd,sc,sardinian,sarde
421
+ srn,,sranan tongo,sranan tongo
422
+ srp,sr,serbian,serbe
423
+ srr,,serer,sérère
424
+ ssa,,nilo-saharan languages,nilo-saharien
425
+ ssw,ss,swati,swati
426
+ suk,,sukuma,sukuma
427
+ sun,su,sundanese,soundanais
428
+ sus,,susu,soussou
429
+ sux,,sumerian,sumérien
430
+ swa,sw,swahili,swahili
431
+ swe,sv,swedish,suédois
432
+ syc,,classical syriac,syriaque classique
433
+ syr,,syriac,syriaque
434
+ tah,ty,tahitian,tahitien
435
+ tai,,tai languages,tai
436
+ tam,ta,tamil,tamoul
437
+ tat,tt,tatar,tatar
438
+ tel,te,telugu,télougou
439
+ tem,,timne,temne
440
+ ter,,tereno,tereno
441
+ tet,,tetum,tetum
442
+ tgk,tg,tajik,tadjik
443
+ tgl,tl,tagalog,tagalog
444
+ tha,th,thai,thaï
445
+ tib,bo,tibetan,tibétain
446
+ tig,,tigre,tigré
447
+ tir,ti,tigrinya,tigrigna
448
+ tiv,,tiv,tiv
449
+ tkl,,tokelau,tokelau
450
+ tlh,,klingon-hol,klingon
451
+ tli,,tlingit,tlingit
452
+ tmh,,tamashek,tamacheq
453
+ tog,,tonga,tonga
454
+ ton,to,tonga,tongan
455
+ tpi,,tok pisin,tok pisin
456
+ tsi,,tsimshian,tsimshian
457
+ tsn,tn,tswana,tswana
458
+ tso,ts,tsonga,tsonga
459
+ tuk,tk,turkmen,turkmène
460
+ tum,,tumbuka,tumbuka
461
+ tup,,tupi languages,tupi
462
+ tur,tr,turkish,turc
463
+ tut,,altaic languages,altaïque
464
+ tvl,,tuvalu,tuvalu
465
+ twi,tw,twi,twi
466
+ tyv,,tuvinian,touva
467
+ udm,,udmurt,oudmourte
468
+ uga,,ugaritic,ougaritique
469
+ uig,ug,uighur,ouïgour
470
+ ukr,uk,ukrainian,ukrainien
471
+ umb,,umbundu,umbundu
472
+ und,,undetermined,indéterminée
473
+ urd,ur,urdu,ourdou
474
+ uzb,uz,uzbek,ouszbek
475
+ vai,,vai,vaï
476
+ ven,ve,venda,venda
477
+ vie,vi,vietnamese,vietnamien
478
+ vol,vo,volapük,volapük
479
+ vot,,votic,vote
480
+ wak,,wakashan languages,wakashane
481
+ wal,,wolaitta,wolaitta
482
+ war,,waray,waray
483
+ was,,washo,washo
484
+ wel,cy,welsh,gallois
485
+ wen,,sorbian languages,sorabe
486
+ wln,wa,walloon,wallon
487
+ wol,wo,wolof,wolof
488
+ xal,,kalmyk,kalmouk
489
+ xho,xh,xhosa,xhosa
490
+ yao,,yao,yao
491
+ yap,,yapese,yapois
492
+ yid,yi,yiddish,yiddish
493
+ yor,yo,yoruba,yoruba
494
+ ypk,,yupik languages,yupik
495
+ zap,,zapotec,zapotèque
496
+ zbl,,blissymbols|bliss,symboles bliss
497
+ zen,,zenaga,zenaga
498
+ zha,za,zhuang,zhuang
499
+ chi,zh,chinese,chinois
500
+ znd,,zande languages,zandé
501
+ zul,zu,zulu,zoulou
502
+ zun,,zuni,zuni
503
+ zxx,,no linguistic content applicable,pas de contenu linguistique applicable
504
+ zza,,zaza|dimli|kirmanjki,zaza|dimli|kirmanjki