langa 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (122) hide show
  1. data/COPYING +674 -0
  2. data/README +69 -0
  3. data/bin/langa +169 -0
  4. data/examples/afrikaans_1953_utf8.txt +1000 -0
  5. data/examples/albanian_utf8.txt +1000 -0
  6. data/examples/amharic_utf8.txt +1000 -0
  7. data/examples/arabic_svd_utf8.txt +1000 -0
  8. data/examples/armenian_western_1853_utf8.txt +1000 -0
  9. data/examples/asv_utf8.txt +1000 -0
  10. data/examples/basque_1571_utf8.txt +1000 -0
  11. data/examples/breton_utf8.txt +1000 -0
  12. data/examples/chinese_ncv_s_utf8.txt +1000 -0
  13. data/examples/chinese_ncv_utf8.txt +1000 -0
  14. data/examples/chinese_union_s_utf8.txt +1000 -0
  15. data/examples/chinese_union_utf8.txt +1000 -0
  16. data/examples/coptic_nt_utf8.txt +1000 -0
  17. data/examples/croatian_utf8.txt +1000 -0
  18. data/examples/czech_bkr_utf8.txt +1000 -0
  19. data/examples/danish_utf8.txt +1000 -0
  20. data/examples/dutch_svv_utf8.txt +1000 -0
  21. data/examples/esperanto_utf8.txt +1000 -0
  22. data/examples/estonian_utf8.txt +1000 -0
  23. data/examples/finnish_pr_1992_utf8.txt +1000 -0
  24. data/examples/french_ostervald_1996_utf8.txt +1000 -0
  25. data/examples/german_schlachter_1951_utf8.txt +1000 -0
  26. data/examples/greek_byzantine_2000_utf8.txt +1000 -0
  27. data/examples/greek_modern_utf8.txt +1000 -0
  28. data/examples/hebrew_modern_utf8.txt +1000 -0
  29. data/examples/hungarian_karoli_utf8.txt +1000 -0
  30. data/examples/italian_riveduta_1927_utf8.txt +1000 -0
  31. data/examples/kabyle_nt_utf8.txt +1000 -0
  32. data/examples/kjv_apocrypha_utf8.txt +1000 -0
  33. data/examples/korean_utf8.txt +1000 -0
  34. data/examples/latin_vulgata_clementina_utf8.txt +1000 -0
  35. data/examples/latvian_nt_utf8.txt +1000 -0
  36. data/examples/lithuanian_utf8.txt +1000 -0
  37. data/examples/manx_gaelic_utf8.txt +1000 -0
  38. data/examples/maori_utf8.txt +1000 -0
  39. data/examples/myanmar_judson_1835_utf8.txt +1000 -0
  40. data/examples/norwegian_utf8.txt +1000 -0
  41. data/examples/peshitta_utf8.txt +1000 -0
  42. data/examples/portuguese_utf8.txt +1000 -0
  43. data/examples/romani_utf8.txt +1000 -0
  44. data/examples/romanian_cornilescu_utf8.txt +1000 -0
  45. data/examples/russian_makarij_utf8.txt +1000 -0
  46. data/examples/spanish_reina_valera_1909_utf8.txt +1000 -0
  47. data/examples/swedish_1917_utf8.txt +1000 -0
  48. data/examples/tagalog_1905_utf8.txt +1000 -0
  49. data/examples/thai_kjv_utf8.txt +1000 -0
  50. data/examples/turkish_nt_utf8.txt +1000 -0
  51. data/examples/turkish_utf8.txt +1000 -0
  52. data/examples/ukrainian_1871_utf8.txt +1000 -0
  53. data/examples/vietnamese_1934_utf8.txt +1000 -0
  54. data/examples/wolof_utf8.txt +1000 -0
  55. data/examples/xhosa_utf8.txt +1000 -0
  56. data/lib/langa.rb +35 -0
  57. data/lib/langa/dna.rb +209 -0
  58. data/lib/langa/file.rb +97 -0
  59. data/lib/langa/langa.dna +406 -0
  60. data/lib/langa/languageanalyzer.rb +134 -0
  61. data/lib/langa/languages.rb +147 -0
  62. data/lib/langa/randomtestfiles.rb +140 -0
  63. data/lib/langa/utilities.rb +53 -0
  64. data/test/tc_file.rb +47 -0
  65. data/test/tc_languages.rb +69 -0
  66. data/test/tc_utilities.rb +42 -0
  67. data/unicode/CaseFolding.txt +1065 -0
  68. data/unicode/CaseFolding.txt.webloc +8 -0
  69. data/unicode/Index of -Public-MAPPINGS.webloc b/data/unicode/Index of → -Public-MAPPINGS.webloc +0 -0
  70. data/unicode/mappings/8859-1.TXT +303 -0
  71. data/unicode/mappings/8859-10.TXT +303 -0
  72. data/unicode/mappings/8859-11.TXT +297 -0
  73. data/unicode/mappings/8859-13.TXT +299 -0
  74. data/unicode/mappings/8859-14.TXT +301 -0
  75. data/unicode/mappings/8859-15.TXT +303 -0
  76. data/unicode/mappings/8859-16.TXT +299 -0
  77. data/unicode/mappings/8859-2.TXT +303 -0
  78. data/unicode/mappings/8859-3.TXT +296 -0
  79. data/unicode/mappings/8859-4.TXT +303 -0
  80. data/unicode/mappings/8859-5.TXT +303 -0
  81. data/unicode/mappings/8859-6.TXT +260 -0
  82. data/unicode/mappings/8859-7.TXT +308 -0
  83. data/unicode/mappings/8859-8.TXT +270 -0
  84. data/unicode/mappings/8859-9.TXT +307 -0
  85. data/unicode/mappings/ATARIST.TXT +313 -0
  86. data/unicode/mappings/CP037.TXT +275 -0
  87. data/unicode/mappings/CP1006.TXT +302 -0
  88. data/unicode/mappings/CP1026.TXT +275 -0
  89. data/unicode/mappings/CP1250.TXT +274 -0
  90. data/unicode/mappings/CP1251.TXT +274 -0
  91. data/unicode/mappings/CP1252.TXT +274 -0
  92. data/unicode/mappings/CP1253.TXT +274 -0
  93. data/unicode/mappings/CP1254.TXT +274 -0
  94. data/unicode/mappings/CP1255.TXT +274 -0
  95. data/unicode/mappings/CP1256.TXT +274 -0
  96. data/unicode/mappings/CP1257.TXT +274 -0
  97. data/unicode/mappings/CP1258.TXT +274 -0
  98. data/unicode/mappings/CP424.TXT +304 -0
  99. data/unicode/mappings/CP437.TXT +274 -0
  100. data/unicode/mappings/CP500.TXT +275 -0
  101. data/unicode/mappings/CP737.TXT +274 -0
  102. data/unicode/mappings/CP775.TXT +275 -0
  103. data/unicode/mappings/CP850.TXT +274 -0
  104. data/unicode/mappings/CP852.TXT +274 -0
  105. data/unicode/mappings/CP855.TXT +275 -0
  106. data/unicode/mappings/CP856.TXT +303 -0
  107. data/unicode/mappings/CP857.TXT +275 -0
  108. data/unicode/mappings/CP860.TXT +275 -0
  109. data/unicode/mappings/CP861.TXT +275 -0
  110. data/unicode/mappings/CP862.TXT +275 -0
  111. data/unicode/mappings/CP863.TXT +275 -0
  112. data/unicode/mappings/CP864.TXT +275 -0
  113. data/unicode/mappings/CP865.TXT +275 -0
  114. data/unicode/mappings/CP866.TXT +275 -0
  115. data/unicode/mappings/CP869.TXT +275 -0
  116. data/unicode/mappings/CP874.TXT +274 -0
  117. data/unicode/mappings/CP875.TXT +275 -0
  118. data/unicode/mappings/KOI8-R.TXT +302 -0
  119. data/unicode/mappings/NEXTSTEP.TXT +173 -0
  120. data/unicode/mappings/ROMAN.TXT +275 -0
  121. data/unicode/mappings/US-ASCII-QUOTES.TXT +198 -0
  122. metadata +180 -0
metadata ADDED
@@ -0,0 +1,180 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: langa
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - John Vorhauer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-01-13 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: langa@vorhauer.de
18
+ executables:
19
+ - langa
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README
24
+ files:
25
+ - bin/langa
26
+ - lib/langa
27
+ - lib/langa/dna.rb
28
+ - lib/langa/file.rb
29
+ - lib/langa/langa.dna
30
+ - lib/langa/languageanalyzer.rb
31
+ - lib/langa/languages.rb
32
+ - lib/langa/randomtestfiles.rb
33
+ - lib/langa/utilities.rb
34
+ - lib/langa.rb
35
+ - test/tc_file.rb
36
+ - test/tc_languages.rb
37
+ - test/tc_utilities.rb
38
+ - examples/afrikaans_1953_utf8.txt
39
+ - examples/albanian_utf8.txt
40
+ - examples/amharic_utf8.txt
41
+ - examples/arabic_svd_utf8.txt
42
+ - examples/armenian_western_1853_utf8.txt
43
+ - examples/asv_utf8.txt
44
+ - examples/basque_1571_utf8.txt
45
+ - examples/breton_utf8.txt
46
+ - examples/chinese_ncv_s_utf8.txt
47
+ - examples/chinese_ncv_utf8.txt
48
+ - examples/chinese_union_s_utf8.txt
49
+ - examples/chinese_union_utf8.txt
50
+ - examples/coptic_nt_utf8.txt
51
+ - examples/croatian_utf8.txt
52
+ - examples/czech_bkr_utf8.txt
53
+ - examples/danish_utf8.txt
54
+ - examples/dutch_svv_utf8.txt
55
+ - examples/esperanto_utf8.txt
56
+ - examples/estonian_utf8.txt
57
+ - examples/finnish_pr_1992_utf8.txt
58
+ - examples/french_ostervald_1996_utf8.txt
59
+ - examples/german_schlachter_1951_utf8.txt
60
+ - examples/greek_byzantine_2000_utf8.txt
61
+ - examples/greek_modern_utf8.txt
62
+ - examples/hebrew_modern_utf8.txt
63
+ - examples/hungarian_karoli_utf8.txt
64
+ - examples/italian_riveduta_1927_utf8.txt
65
+ - examples/kabyle_nt_utf8.txt
66
+ - examples/kjv_apocrypha_utf8.txt
67
+ - examples/korean_utf8.txt
68
+ - examples/latin_vulgata_clementina_utf8.txt
69
+ - examples/latvian_nt_utf8.txt
70
+ - examples/lithuanian_utf8.txt
71
+ - examples/manx_gaelic_utf8.txt
72
+ - examples/maori_utf8.txt
73
+ - examples/myanmar_judson_1835_utf8.txt
74
+ - examples/norwegian_utf8.txt
75
+ - examples/peshitta_utf8.txt
76
+ - examples/portuguese_utf8.txt
77
+ - examples/romani_utf8.txt
78
+ - examples/romanian_cornilescu_utf8.txt
79
+ - examples/russian_makarij_utf8.txt
80
+ - examples/spanish_reina_valera_1909_utf8.txt
81
+ - examples/swedish_1917_utf8.txt
82
+ - examples/tagalog_1905_utf8.txt
83
+ - examples/thai_kjv_utf8.txt
84
+ - examples/turkish_nt_utf8.txt
85
+ - examples/turkish_utf8.txt
86
+ - examples/ukrainian_1871_utf8.txt
87
+ - examples/vietnamese_1934_utf8.txt
88
+ - examples/wolof_utf8.txt
89
+ - examples/xhosa_utf8.txt
90
+ - unicode/CaseFolding.txt
91
+ - unicode/CaseFolding.txt.webloc
92
+ - unicode/Index of -Public-MAPPINGS.webloc
93
+ - unicode/mappings
94
+ - unicode/mappings/8859-1.TXT
95
+ - unicode/mappings/8859-10.TXT
96
+ - unicode/mappings/8859-11.TXT
97
+ - unicode/mappings/8859-13.TXT
98
+ - unicode/mappings/8859-14.TXT
99
+ - unicode/mappings/8859-15.TXT
100
+ - unicode/mappings/8859-16.TXT
101
+ - unicode/mappings/8859-2.TXT
102
+ - unicode/mappings/8859-3.TXT
103
+ - unicode/mappings/8859-4.TXT
104
+ - unicode/mappings/8859-5.TXT
105
+ - unicode/mappings/8859-6.TXT
106
+ - unicode/mappings/8859-7.TXT
107
+ - unicode/mappings/8859-8.TXT
108
+ - unicode/mappings/8859-9.TXT
109
+ - unicode/mappings/ATARIST.TXT
110
+ - unicode/mappings/CP037.TXT
111
+ - unicode/mappings/CP1006.TXT
112
+ - unicode/mappings/CP1026.TXT
113
+ - unicode/mappings/CP1250.TXT
114
+ - unicode/mappings/CP1251.TXT
115
+ - unicode/mappings/CP1252.TXT
116
+ - unicode/mappings/CP1253.TXT
117
+ - unicode/mappings/CP1254.TXT
118
+ - unicode/mappings/CP1255.TXT
119
+ - unicode/mappings/CP1256.TXT
120
+ - unicode/mappings/CP1257.TXT
121
+ - unicode/mappings/CP1258.TXT
122
+ - unicode/mappings/CP424.TXT
123
+ - unicode/mappings/CP437.TXT
124
+ - unicode/mappings/CP500.TXT
125
+ - unicode/mappings/CP737.TXT
126
+ - unicode/mappings/CP775.TXT
127
+ - unicode/mappings/CP850.TXT
128
+ - unicode/mappings/CP852.TXT
129
+ - unicode/mappings/CP855.TXT
130
+ - unicode/mappings/CP856.TXT
131
+ - unicode/mappings/CP857.TXT
132
+ - unicode/mappings/CP860.TXT
133
+ - unicode/mappings/CP861.TXT
134
+ - unicode/mappings/CP862.TXT
135
+ - unicode/mappings/CP863.TXT
136
+ - unicode/mappings/CP864.TXT
137
+ - unicode/mappings/CP865.TXT
138
+ - unicode/mappings/CP866.TXT
139
+ - unicode/mappings/CP869.TXT
140
+ - unicode/mappings/CP874.TXT
141
+ - unicode/mappings/CP875.TXT
142
+ - unicode/mappings/KOI8-R.TXT
143
+ - unicode/mappings/NEXTSTEP.TXT
144
+ - unicode/mappings/ROMAN.TXT
145
+ - unicode/mappings/US-ASCII-QUOTES.TXT
146
+ - COPYING
147
+ - README
148
+ has_rdoc: true
149
+ homepage: http://langa.rubyforge.org
150
+ post_install_message:
151
+ rdoc_options:
152
+ - --main
153
+ - README
154
+ - --charset
155
+ - UTF-8
156
+ require_paths:
157
+ - lib
158
+ required_ruby_version: !ruby/object:Gem::Requirement
159
+ requirements:
160
+ - - ">="
161
+ - !ruby/object:Gem::Version
162
+ version: "0"
163
+ version:
164
+ required_rubygems_version: !ruby/object:Gem::Requirement
165
+ requirements:
166
+ - - ">="
167
+ - !ruby/object:Gem::Version
168
+ version: "0"
169
+ version:
170
+ requirements: []
171
+
172
+ rubyforge_project: langa
173
+ rubygems_version: 1.0.1
174
+ signing_key:
175
+ specification_version: 2
176
+ summary: Automatic natural language recognition from arbitrary textfiles
177
+ test_files:
178
+ - test/tc_file.rb
179
+ - test/tc_languages.rb
180
+ - test/tc_utilities.rb