langa 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. data/COPYING +674 -0
  2. data/README +69 -0
  3. data/bin/langa +169 -0
  4. data/examples/afrikaans_1953_utf8.txt +1000 -0
  5. data/examples/albanian_utf8.txt +1000 -0
  6. data/examples/amharic_utf8.txt +1000 -0
  7. data/examples/arabic_svd_utf8.txt +1000 -0
  8. data/examples/armenian_western_1853_utf8.txt +1000 -0
  9. data/examples/asv_utf8.txt +1000 -0
  10. data/examples/basque_1571_utf8.txt +1000 -0
  11. data/examples/breton_utf8.txt +1000 -0
  12. data/examples/chinese_ncv_s_utf8.txt +1000 -0
  13. data/examples/chinese_ncv_utf8.txt +1000 -0
  14. data/examples/chinese_union_s_utf8.txt +1000 -0
  15. data/examples/chinese_union_utf8.txt +1000 -0
  16. data/examples/coptic_nt_utf8.txt +1000 -0
  17. data/examples/croatian_utf8.txt +1000 -0
  18. data/examples/czech_bkr_utf8.txt +1000 -0
  19. data/examples/danish_utf8.txt +1000 -0
  20. data/examples/dutch_svv_utf8.txt +1000 -0
  21. data/examples/esperanto_utf8.txt +1000 -0
  22. data/examples/estonian_utf8.txt +1000 -0
  23. data/examples/finnish_pr_1992_utf8.txt +1000 -0
  24. data/examples/french_ostervald_1996_utf8.txt +1000 -0
  25. data/examples/german_schlachter_1951_utf8.txt +1000 -0
  26. data/examples/greek_byzantine_2000_utf8.txt +1000 -0
  27. data/examples/greek_modern_utf8.txt +1000 -0
  28. data/examples/hebrew_modern_utf8.txt +1000 -0
  29. data/examples/hungarian_karoli_utf8.txt +1000 -0
  30. data/examples/italian_riveduta_1927_utf8.txt +1000 -0
  31. data/examples/kabyle_nt_utf8.txt +1000 -0
  32. data/examples/kjv_apocrypha_utf8.txt +1000 -0
  33. data/examples/korean_utf8.txt +1000 -0
  34. data/examples/latin_vulgata_clementina_utf8.txt +1000 -0
  35. data/examples/latvian_nt_utf8.txt +1000 -0
  36. data/examples/lithuanian_utf8.txt +1000 -0
  37. data/examples/manx_gaelic_utf8.txt +1000 -0
  38. data/examples/maori_utf8.txt +1000 -0
  39. data/examples/myanmar_judson_1835_utf8.txt +1000 -0
  40. data/examples/norwegian_utf8.txt +1000 -0
  41. data/examples/peshitta_utf8.txt +1000 -0
  42. data/examples/portuguese_utf8.txt +1000 -0
  43. data/examples/romani_utf8.txt +1000 -0
  44. data/examples/romanian_cornilescu_utf8.txt +1000 -0
  45. data/examples/russian_makarij_utf8.txt +1000 -0
  46. data/examples/spanish_reina_valera_1909_utf8.txt +1000 -0
  47. data/examples/swedish_1917_utf8.txt +1000 -0
  48. data/examples/tagalog_1905_utf8.txt +1000 -0
  49. data/examples/thai_kjv_utf8.txt +1000 -0
  50. data/examples/turkish_nt_utf8.txt +1000 -0
  51. data/examples/turkish_utf8.txt +1000 -0
  52. data/examples/ukrainian_1871_utf8.txt +1000 -0
  53. data/examples/vietnamese_1934_utf8.txt +1000 -0
  54. data/examples/wolof_utf8.txt +1000 -0
  55. data/examples/xhosa_utf8.txt +1000 -0
  56. data/lib/langa.rb +35 -0
  57. data/lib/langa/dna.rb +209 -0
  58. data/lib/langa/file.rb +97 -0
  59. data/lib/langa/langa.dna +406 -0
  60. data/lib/langa/languageanalyzer.rb +134 -0
  61. data/lib/langa/languages.rb +147 -0
  62. data/lib/langa/randomtestfiles.rb +140 -0
  63. data/lib/langa/utilities.rb +53 -0
  64. data/test/tc_file.rb +47 -0
  65. data/test/tc_languages.rb +69 -0
  66. data/test/tc_utilities.rb +42 -0
  67. data/unicode/CaseFolding.txt +1065 -0
  68. data/unicode/CaseFolding.txt.webloc +8 -0
  69. data/unicode/Index of -Public-MAPPINGS.webloc b/data/unicode/Index of → -Public-MAPPINGS.webloc +0 -0
  70. data/unicode/mappings/8859-1.TXT +303 -0
  71. data/unicode/mappings/8859-10.TXT +303 -0
  72. data/unicode/mappings/8859-11.TXT +297 -0
  73. data/unicode/mappings/8859-13.TXT +299 -0
  74. data/unicode/mappings/8859-14.TXT +301 -0
  75. data/unicode/mappings/8859-15.TXT +303 -0
  76. data/unicode/mappings/8859-16.TXT +299 -0
  77. data/unicode/mappings/8859-2.TXT +303 -0
  78. data/unicode/mappings/8859-3.TXT +296 -0
  79. data/unicode/mappings/8859-4.TXT +303 -0
  80. data/unicode/mappings/8859-5.TXT +303 -0
  81. data/unicode/mappings/8859-6.TXT +260 -0
  82. data/unicode/mappings/8859-7.TXT +308 -0
  83. data/unicode/mappings/8859-8.TXT +270 -0
  84. data/unicode/mappings/8859-9.TXT +307 -0
  85. data/unicode/mappings/ATARIST.TXT +313 -0
  86. data/unicode/mappings/CP037.TXT +275 -0
  87. data/unicode/mappings/CP1006.TXT +302 -0
  88. data/unicode/mappings/CP1026.TXT +275 -0
  89. data/unicode/mappings/CP1250.TXT +274 -0
  90. data/unicode/mappings/CP1251.TXT +274 -0
  91. data/unicode/mappings/CP1252.TXT +274 -0
  92. data/unicode/mappings/CP1253.TXT +274 -0
  93. data/unicode/mappings/CP1254.TXT +274 -0
  94. data/unicode/mappings/CP1255.TXT +274 -0
  95. data/unicode/mappings/CP1256.TXT +274 -0
  96. data/unicode/mappings/CP1257.TXT +274 -0
  97. data/unicode/mappings/CP1258.TXT +274 -0
  98. data/unicode/mappings/CP424.TXT +304 -0
  99. data/unicode/mappings/CP437.TXT +274 -0
  100. data/unicode/mappings/CP500.TXT +275 -0
  101. data/unicode/mappings/CP737.TXT +274 -0
  102. data/unicode/mappings/CP775.TXT +275 -0
  103. data/unicode/mappings/CP850.TXT +274 -0
  104. data/unicode/mappings/CP852.TXT +274 -0
  105. data/unicode/mappings/CP855.TXT +275 -0
  106. data/unicode/mappings/CP856.TXT +303 -0
  107. data/unicode/mappings/CP857.TXT +275 -0
  108. data/unicode/mappings/CP860.TXT +275 -0
  109. data/unicode/mappings/CP861.TXT +275 -0
  110. data/unicode/mappings/CP862.TXT +275 -0
  111. data/unicode/mappings/CP863.TXT +275 -0
  112. data/unicode/mappings/CP864.TXT +275 -0
  113. data/unicode/mappings/CP865.TXT +275 -0
  114. data/unicode/mappings/CP866.TXT +275 -0
  115. data/unicode/mappings/CP869.TXT +275 -0
  116. data/unicode/mappings/CP874.TXT +274 -0
  117. data/unicode/mappings/CP875.TXT +275 -0
  118. data/unicode/mappings/KOI8-R.TXT +302 -0
  119. data/unicode/mappings/NEXTSTEP.TXT +173 -0
  120. data/unicode/mappings/ROMAN.TXT +275 -0
  121. data/unicode/mappings/US-ASCII-QUOTES.TXT +198 -0
  122. metadata +180 -0
metadata ADDED
@@ -0,0 +1,180 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: langa
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - John Vorhauer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-01-13 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: langa@vorhauer.de
18
+ executables:
19
+ - langa
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README
24
+ files:
25
+ - bin/langa
26
+ - lib/langa
27
+ - lib/langa/dna.rb
28
+ - lib/langa/file.rb
29
+ - lib/langa/langa.dna
30
+ - lib/langa/languageanalyzer.rb
31
+ - lib/langa/languages.rb
32
+ - lib/langa/randomtestfiles.rb
33
+ - lib/langa/utilities.rb
34
+ - lib/langa.rb
35
+ - test/tc_file.rb
36
+ - test/tc_languages.rb
37
+ - test/tc_utilities.rb
38
+ - examples/afrikaans_1953_utf8.txt
39
+ - examples/albanian_utf8.txt
40
+ - examples/amharic_utf8.txt
41
+ - examples/arabic_svd_utf8.txt
42
+ - examples/armenian_western_1853_utf8.txt
43
+ - examples/asv_utf8.txt
44
+ - examples/basque_1571_utf8.txt
45
+ - examples/breton_utf8.txt
46
+ - examples/chinese_ncv_s_utf8.txt
47
+ - examples/chinese_ncv_utf8.txt
48
+ - examples/chinese_union_s_utf8.txt
49
+ - examples/chinese_union_utf8.txt
50
+ - examples/coptic_nt_utf8.txt
51
+ - examples/croatian_utf8.txt
52
+ - examples/czech_bkr_utf8.txt
53
+ - examples/danish_utf8.txt
54
+ - examples/dutch_svv_utf8.txt
55
+ - examples/esperanto_utf8.txt
56
+ - examples/estonian_utf8.txt
57
+ - examples/finnish_pr_1992_utf8.txt
58
+ - examples/french_ostervald_1996_utf8.txt
59
+ - examples/german_schlachter_1951_utf8.txt
60
+ - examples/greek_byzantine_2000_utf8.txt
61
+ - examples/greek_modern_utf8.txt
62
+ - examples/hebrew_modern_utf8.txt
63
+ - examples/hungarian_karoli_utf8.txt
64
+ - examples/italian_riveduta_1927_utf8.txt
65
+ - examples/kabyle_nt_utf8.txt
66
+ - examples/kjv_apocrypha_utf8.txt
67
+ - examples/korean_utf8.txt
68
+ - examples/latin_vulgata_clementina_utf8.txt
69
+ - examples/latvian_nt_utf8.txt
70
+ - examples/lithuanian_utf8.txt
71
+ - examples/manx_gaelic_utf8.txt
72
+ - examples/maori_utf8.txt
73
+ - examples/myanmar_judson_1835_utf8.txt
74
+ - examples/norwegian_utf8.txt
75
+ - examples/peshitta_utf8.txt
76
+ - examples/portuguese_utf8.txt
77
+ - examples/romani_utf8.txt
78
+ - examples/romanian_cornilescu_utf8.txt
79
+ - examples/russian_makarij_utf8.txt
80
+ - examples/spanish_reina_valera_1909_utf8.txt
81
+ - examples/swedish_1917_utf8.txt
82
+ - examples/tagalog_1905_utf8.txt
83
+ - examples/thai_kjv_utf8.txt
84
+ - examples/turkish_nt_utf8.txt
85
+ - examples/turkish_utf8.txt
86
+ - examples/ukrainian_1871_utf8.txt
87
+ - examples/vietnamese_1934_utf8.txt
88
+ - examples/wolof_utf8.txt
89
+ - examples/xhosa_utf8.txt
90
+ - unicode/CaseFolding.txt
91
+ - unicode/CaseFolding.txt.webloc
92
+ - unicode/Index of -Public-MAPPINGS.webloc
93
+ - unicode/mappings
94
+ - unicode/mappings/8859-1.TXT
95
+ - unicode/mappings/8859-10.TXT
96
+ - unicode/mappings/8859-11.TXT
97
+ - unicode/mappings/8859-13.TXT
98
+ - unicode/mappings/8859-14.TXT
99
+ - unicode/mappings/8859-15.TXT
100
+ - unicode/mappings/8859-16.TXT
101
+ - unicode/mappings/8859-2.TXT
102
+ - unicode/mappings/8859-3.TXT
103
+ - unicode/mappings/8859-4.TXT
104
+ - unicode/mappings/8859-5.TXT
105
+ - unicode/mappings/8859-6.TXT
106
+ - unicode/mappings/8859-7.TXT
107
+ - unicode/mappings/8859-8.TXT
108
+ - unicode/mappings/8859-9.TXT
109
+ - unicode/mappings/ATARIST.TXT
110
+ - unicode/mappings/CP037.TXT
111
+ - unicode/mappings/CP1006.TXT
112
+ - unicode/mappings/CP1026.TXT
113
+ - unicode/mappings/CP1250.TXT
114
+ - unicode/mappings/CP1251.TXT
115
+ - unicode/mappings/CP1252.TXT
116
+ - unicode/mappings/CP1253.TXT
117
+ - unicode/mappings/CP1254.TXT
118
+ - unicode/mappings/CP1255.TXT
119
+ - unicode/mappings/CP1256.TXT
120
+ - unicode/mappings/CP1257.TXT
121
+ - unicode/mappings/CP1258.TXT
122
+ - unicode/mappings/CP424.TXT
123
+ - unicode/mappings/CP437.TXT
124
+ - unicode/mappings/CP500.TXT
125
+ - unicode/mappings/CP737.TXT
126
+ - unicode/mappings/CP775.TXT
127
+ - unicode/mappings/CP850.TXT
128
+ - unicode/mappings/CP852.TXT
129
+ - unicode/mappings/CP855.TXT
130
+ - unicode/mappings/CP856.TXT
131
+ - unicode/mappings/CP857.TXT
132
+ - unicode/mappings/CP860.TXT
133
+ - unicode/mappings/CP861.TXT
134
+ - unicode/mappings/CP862.TXT
135
+ - unicode/mappings/CP863.TXT
136
+ - unicode/mappings/CP864.TXT
137
+ - unicode/mappings/CP865.TXT
138
+ - unicode/mappings/CP866.TXT
139
+ - unicode/mappings/CP869.TXT
140
+ - unicode/mappings/CP874.TXT
141
+ - unicode/mappings/CP875.TXT
142
+ - unicode/mappings/KOI8-R.TXT
143
+ - unicode/mappings/NEXTSTEP.TXT
144
+ - unicode/mappings/ROMAN.TXT
145
+ - unicode/mappings/US-ASCII-QUOTES.TXT
146
+ - COPYING
147
+ - README
148
+ has_rdoc: true
149
+ homepage: http://langa.rubyforge.org
150
+ post_install_message:
151
+ rdoc_options:
152
+ - --main
153
+ - README
154
+ - --charset
155
+ - UTF-8
156
+ require_paths:
157
+ - lib
158
+ required_ruby_version: !ruby/object:Gem::Requirement
159
+ requirements:
160
+ - - ">="
161
+ - !ruby/object:Gem::Version
162
+ version: "0"
163
+ version:
164
+ required_rubygems_version: !ruby/object:Gem::Requirement
165
+ requirements:
166
+ - - ">="
167
+ - !ruby/object:Gem::Version
168
+ version: "0"
169
+ version:
170
+ requirements: []
171
+
172
+ rubyforge_project: langa
173
+ rubygems_version: 1.0.1
174
+ signing_key:
175
+ specification_version: 2
176
+ summary: Automatic natural language recognition from arbitrary textfiles
177
+ test_files:
178
+ - test/tc_file.rb
179
+ - test/tc_languages.rb
180
+ - test/tc_utilities.rb