langusta 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +32 -0
- data/LICENSE.txt +13 -0
- data/README.rdoc +34 -0
- data/Rakefile +55 -0
- data/VERSION +1 -0
- data/bin/langusta +5 -0
- data/data/messages.properties +128 -0
- data/data/uppercase.bin +0 -0
- data/langusta.gemspec +210 -0
- data/lib/langusta.rb +36 -0
- data/lib/langusta/command.rb +78 -0
- data/lib/langusta/detector.rb +197 -0
- data/lib/langusta/detector_factory.rb +46 -0
- data/lib/langusta/java_property_reader.rb +35 -0
- data/lib/langusta/lang_profile.rb +80 -0
- data/lib/langusta/language.rb +14 -0
- data/lib/langusta/language_detection_facade.rb +24 -0
- data/lib/langusta/n_gram.rb +116 -0
- data/lib/langusta/regex_helper.rb +15 -0
- data/lib/langusta/tag_extractor.rb +39 -0
- data/lib/langusta/ucs2_string.rb +70 -0
- data/lib/langusta/unicode_block.rb +56 -0
- data/profiles/af +1 -0
- data/profiles/ar +1 -0
- data/profiles/bg +1 -0
- data/profiles/bn +1 -0
- data/profiles/cs +1 -0
- data/profiles/da +1 -0
- data/profiles/de +1 -0
- data/profiles/el +1 -0
- data/profiles/en +1 -0
- data/profiles/es +1 -0
- data/profiles/fa +1 -0
- data/profiles/fi +1 -0
- data/profiles/fr +1 -0
- data/profiles/gu +1 -0
- data/profiles/he +1 -0
- data/profiles/hi +1 -0
- data/profiles/hr +1 -0
- data/profiles/hu +1 -0
- data/profiles/id +1 -0
- data/profiles/it +1 -0
- data/profiles/ja +1 -0
- data/profiles/kn +1 -0
- data/profiles/ko +1 -0
- data/profiles/mk +1 -0
- data/profiles/ml +1 -0
- data/profiles/mr +1 -0
- data/profiles/ne +1 -0
- data/profiles/nl +1 -0
- data/profiles/no +1 -0
- data/profiles/pa +1 -0
- data/profiles/pl +1 -0
- data/profiles/pt +1 -0
- data/profiles/ro +1 -0
- data/profiles/ru +1 -0
- data/profiles/sk +1 -0
- data/profiles/so +1 -0
- data/profiles/sq +1 -0
- data/profiles/sv +1 -0
- data/profiles/sw +1 -0
- data/profiles/ta +1 -0
- data/profiles/te +1 -0
- data/profiles/th +1 -0
- data/profiles/tl +1 -0
- data/profiles/tr +1 -0
- data/profiles/uk +1 -0
- data/profiles/ur +1 -0
- data/profiles/vi +1 -0
- data/profiles/zh-cn +1 -0
- data/profiles/zh-tw +1 -0
- data/test/helper.rb +20 -0
- data/test/quality/test_falsified.rb +33 -0
- data/test/test_command.rb +34 -0
- data/test/test_data/af +1 -0
- data/test/test_data/ar +1 -0
- data/test/test_data/bg +32 -0
- data/test/test_data/bn +9 -0
- data/test/test_data/cs +9 -0
- data/test/test_data/da +14 -0
- data/test/test_data/de +4 -0
- data/test/test_data/el +7 -0
- data/test/test_data/en +26 -0
- data/test/test_data/es +4 -0
- data/test/test_data/fa +21 -0
- data/test/test_data/fi +8 -0
- data/test/test_data/fr +13 -0
- data/test/test_data/gu +3 -0
- data/test/test_data/he +20 -0
- data/test/test_data/hi +1 -0
- data/test/test_data/hr +16 -0
- data/test/test_data/hu +6 -0
- data/test/test_data/id +2 -0
- data/test/test_data/it +3 -0
- data/test/test_data/ja +34 -0
- data/test/test_data/kn +14 -0
- data/test/test_data/ko +2 -0
- data/test/test_data/mk +3 -0
- data/test/test_data/ml +1 -0
- data/test/test_data/mr +3 -0
- data/test/test_data/ne +2 -0
- data/test/test_data/nl +1 -0
- data/test/test_data/no +3 -0
- data/test/test_data/pa +1 -0
- data/test/test_data/pl +23 -0
- data/test/test_data/pt +2 -0
- data/test/test_data/ro +2 -0
- data/test/test_data/ru +1 -0
- data/test/test_data/sk +2 -0
- data/test/test_data/so +4 -0
- data/test/test_data/sq +4 -0
- data/test/test_data/sv +3 -0
- data/test/test_data/sw +6 -0
- data/test/test_data/ta +1 -0
- data/test/test_data/te +2 -0
- data/test/test_data/th +3 -0
- data/test/test_data/tl +1 -0
- data/test/test_data/tr +2 -0
- data/test/test_data/uk +3 -0
- data/test/test_data/ur +1 -0
- data/test/test_data/vi +2 -0
- data/test/test_data/zh-tw +3 -0
- data/test/test_detector.rb +52 -0
- data/test/test_detector_factory.rb +16 -0
- data/test/test_java_property_reader.rb +8 -0
- data/test/test_lang_profile.rb +79 -0
- data/test/test_language.rb +15 -0
- data/test/test_language_detection_facade.rb +9 -0
- data/test/test_langusta.rb +25 -0
- data/test/test_n_gram.rb +103 -0
- data/test/test_tag_extractor.rb +71 -0
- data/test/test_ucs2_string.rb +9 -0
- data/test/test_unicode_block.rb +9 -0
- metadata +320 -0
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
require 'test/helper'
|
|
2
|
+
|
|
3
|
+
class UnicodeBlockTest < Test::Unit::TestCase
|
|
4
|
+
def test_upper_case
|
|
5
|
+
["\x00\x47", "\x01\x10", "\x01\x64", "\x03\xd5", "\x04\xa2", "\x10\xc3", "\x21\x60", "\xa7\x60"].each do |cp|
|
|
6
|
+
assert(Langusta::UnicodeBlock.is_upper_case?(cp))
|
|
7
|
+
end
|
|
8
|
+
end
|
|
9
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: langusta
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
hash: 27
|
|
5
|
+
prerelease:
|
|
6
|
+
segments:
|
|
7
|
+
- 0
|
|
8
|
+
- 1
|
|
9
|
+
- 0
|
|
10
|
+
version: 0.1.0
|
|
11
|
+
platform: ruby
|
|
12
|
+
authors:
|
|
13
|
+
- Jan Szumiec
|
|
14
|
+
autorequire:
|
|
15
|
+
bindir: bin
|
|
16
|
+
cert_chain: []
|
|
17
|
+
|
|
18
|
+
date: 2011-04-08 00:00:00 +02:00
|
|
19
|
+
default_executable: langusta
|
|
20
|
+
dependencies:
|
|
21
|
+
- !ruby/object:Gem::Dependency
|
|
22
|
+
type: :runtime
|
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
|
24
|
+
none: false
|
|
25
|
+
requirements:
|
|
26
|
+
- - "="
|
|
27
|
+
- !ruby/object:Gem::Version
|
|
28
|
+
hash: 19
|
|
29
|
+
segments:
|
|
30
|
+
- 1
|
|
31
|
+
- 1
|
|
32
|
+
- 0
|
|
33
|
+
version: 1.1.0
|
|
34
|
+
name: oniguruma
|
|
35
|
+
version_requirements: *id001
|
|
36
|
+
prerelease: false
|
|
37
|
+
- !ruby/object:Gem::Dependency
|
|
38
|
+
type: :runtime
|
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
|
40
|
+
none: false
|
|
41
|
+
requirements:
|
|
42
|
+
- - "="
|
|
43
|
+
- !ruby/object:Gem::Version
|
|
44
|
+
hash: 59
|
|
45
|
+
segments:
|
|
46
|
+
- 0
|
|
47
|
+
- 8
|
|
48
|
+
- 2
|
|
49
|
+
version: 0.8.2
|
|
50
|
+
name: yajl-ruby
|
|
51
|
+
version_requirements: *id002
|
|
52
|
+
prerelease: false
|
|
53
|
+
- !ruby/object:Gem::Dependency
|
|
54
|
+
type: :development
|
|
55
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
|
56
|
+
none: false
|
|
57
|
+
requirements:
|
|
58
|
+
- - ~>
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
hash: 23
|
|
61
|
+
segments:
|
|
62
|
+
- 1
|
|
63
|
+
- 0
|
|
64
|
+
- 0
|
|
65
|
+
version: 1.0.0
|
|
66
|
+
name: bundler
|
|
67
|
+
version_requirements: *id003
|
|
68
|
+
prerelease: false
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
type: :development
|
|
71
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
|
72
|
+
none: false
|
|
73
|
+
requirements:
|
|
74
|
+
- - ~>
|
|
75
|
+
- !ruby/object:Gem::Version
|
|
76
|
+
hash: 7
|
|
77
|
+
segments:
|
|
78
|
+
- 1
|
|
79
|
+
- 5
|
|
80
|
+
- 2
|
|
81
|
+
version: 1.5.2
|
|
82
|
+
name: jeweler
|
|
83
|
+
version_requirements: *id004
|
|
84
|
+
prerelease: false
|
|
85
|
+
- !ruby/object:Gem::Dependency
|
|
86
|
+
type: :development
|
|
87
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
|
88
|
+
none: false
|
|
89
|
+
requirements:
|
|
90
|
+
- - ">="
|
|
91
|
+
- !ruby/object:Gem::Version
|
|
92
|
+
hash: 3
|
|
93
|
+
segments:
|
|
94
|
+
- 0
|
|
95
|
+
version: "0"
|
|
96
|
+
name: rcov
|
|
97
|
+
version_requirements: *id005
|
|
98
|
+
prerelease: false
|
|
99
|
+
- !ruby/object:Gem::Dependency
|
|
100
|
+
type: :development
|
|
101
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
|
102
|
+
none: false
|
|
103
|
+
requirements:
|
|
104
|
+
- - ">="
|
|
105
|
+
- !ruby/object:Gem::Version
|
|
106
|
+
hash: 3
|
|
107
|
+
segments:
|
|
108
|
+
- 0
|
|
109
|
+
version: "0"
|
|
110
|
+
name: mocha
|
|
111
|
+
version_requirements: *id006
|
|
112
|
+
prerelease: false
|
|
113
|
+
- !ruby/object:Gem::Dependency
|
|
114
|
+
type: :development
|
|
115
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
|
116
|
+
none: false
|
|
117
|
+
requirements:
|
|
118
|
+
- - ">="
|
|
119
|
+
- !ruby/object:Gem::Version
|
|
120
|
+
hash: 3
|
|
121
|
+
segments:
|
|
122
|
+
- 0
|
|
123
|
+
version: "0"
|
|
124
|
+
name: ruby-debug
|
|
125
|
+
version_requirements: *id007
|
|
126
|
+
prerelease: false
|
|
127
|
+
description: Uses naive bayesian filter.
|
|
128
|
+
email: jan.szumiec@gmail.com
|
|
129
|
+
executables:
|
|
130
|
+
- langusta
|
|
131
|
+
extensions: []
|
|
132
|
+
|
|
133
|
+
extra_rdoc_files:
|
|
134
|
+
- LICENSE.txt
|
|
135
|
+
- README.rdoc
|
|
136
|
+
files:
|
|
137
|
+
- .document
|
|
138
|
+
- Gemfile
|
|
139
|
+
- Gemfile.lock
|
|
140
|
+
- LICENSE.txt
|
|
141
|
+
- README.rdoc
|
|
142
|
+
- Rakefile
|
|
143
|
+
- VERSION
|
|
144
|
+
- bin/langusta
|
|
145
|
+
- data/messages.properties
|
|
146
|
+
- data/uppercase.bin
|
|
147
|
+
- langusta.gemspec
|
|
148
|
+
- lib/langusta.rb
|
|
149
|
+
- lib/langusta/command.rb
|
|
150
|
+
- lib/langusta/detector.rb
|
|
151
|
+
- lib/langusta/detector_factory.rb
|
|
152
|
+
- lib/langusta/java_property_reader.rb
|
|
153
|
+
- lib/langusta/lang_profile.rb
|
|
154
|
+
- lib/langusta/language.rb
|
|
155
|
+
- lib/langusta/language_detection_facade.rb
|
|
156
|
+
- lib/langusta/n_gram.rb
|
|
157
|
+
- lib/langusta/regex_helper.rb
|
|
158
|
+
- lib/langusta/tag_extractor.rb
|
|
159
|
+
- lib/langusta/ucs2_string.rb
|
|
160
|
+
- lib/langusta/unicode_block.rb
|
|
161
|
+
- profiles/af
|
|
162
|
+
- profiles/ar
|
|
163
|
+
- profiles/bg
|
|
164
|
+
- profiles/bn
|
|
165
|
+
- profiles/cs
|
|
166
|
+
- profiles/da
|
|
167
|
+
- profiles/de
|
|
168
|
+
- profiles/el
|
|
169
|
+
- profiles/en
|
|
170
|
+
- profiles/es
|
|
171
|
+
- profiles/fa
|
|
172
|
+
- profiles/fi
|
|
173
|
+
- profiles/fr
|
|
174
|
+
- profiles/gu
|
|
175
|
+
- profiles/he
|
|
176
|
+
- profiles/hi
|
|
177
|
+
- profiles/hr
|
|
178
|
+
- profiles/hu
|
|
179
|
+
- profiles/id
|
|
180
|
+
- profiles/it
|
|
181
|
+
- profiles/ja
|
|
182
|
+
- profiles/kn
|
|
183
|
+
- profiles/ko
|
|
184
|
+
- profiles/mk
|
|
185
|
+
- profiles/ml
|
|
186
|
+
- profiles/mr
|
|
187
|
+
- profiles/ne
|
|
188
|
+
- profiles/nl
|
|
189
|
+
- profiles/no
|
|
190
|
+
- profiles/pa
|
|
191
|
+
- profiles/pl
|
|
192
|
+
- profiles/pt
|
|
193
|
+
- profiles/ro
|
|
194
|
+
- profiles/ru
|
|
195
|
+
- profiles/sk
|
|
196
|
+
- profiles/so
|
|
197
|
+
- profiles/sq
|
|
198
|
+
- profiles/sv
|
|
199
|
+
- profiles/sw
|
|
200
|
+
- profiles/ta
|
|
201
|
+
- profiles/te
|
|
202
|
+
- profiles/th
|
|
203
|
+
- profiles/tl
|
|
204
|
+
- profiles/tr
|
|
205
|
+
- profiles/uk
|
|
206
|
+
- profiles/ur
|
|
207
|
+
- profiles/vi
|
|
208
|
+
- profiles/zh-cn
|
|
209
|
+
- profiles/zh-tw
|
|
210
|
+
- test/helper.rb
|
|
211
|
+
- test/quality/test_falsified.rb
|
|
212
|
+
- test/test_command.rb
|
|
213
|
+
- test/test_data/af
|
|
214
|
+
- test/test_data/ar
|
|
215
|
+
- test/test_data/bg
|
|
216
|
+
- test/test_data/bn
|
|
217
|
+
- test/test_data/cs
|
|
218
|
+
- test/test_data/da
|
|
219
|
+
- test/test_data/de
|
|
220
|
+
- test/test_data/el
|
|
221
|
+
- test/test_data/en
|
|
222
|
+
- test/test_data/es
|
|
223
|
+
- test/test_data/fa
|
|
224
|
+
- test/test_data/fi
|
|
225
|
+
- test/test_data/fr
|
|
226
|
+
- test/test_data/gu
|
|
227
|
+
- test/test_data/he
|
|
228
|
+
- test/test_data/hi
|
|
229
|
+
- test/test_data/hr
|
|
230
|
+
- test/test_data/hu
|
|
231
|
+
- test/test_data/id
|
|
232
|
+
- test/test_data/it
|
|
233
|
+
- test/test_data/ja
|
|
234
|
+
- test/test_data/kn
|
|
235
|
+
- test/test_data/ko
|
|
236
|
+
- test/test_data/mk
|
|
237
|
+
- test/test_data/ml
|
|
238
|
+
- test/test_data/mr
|
|
239
|
+
- test/test_data/ne
|
|
240
|
+
- test/test_data/nl
|
|
241
|
+
- test/test_data/no
|
|
242
|
+
- test/test_data/pa
|
|
243
|
+
- test/test_data/pl
|
|
244
|
+
- test/test_data/pt
|
|
245
|
+
- test/test_data/ro
|
|
246
|
+
- test/test_data/ru
|
|
247
|
+
- test/test_data/sk
|
|
248
|
+
- test/test_data/so
|
|
249
|
+
- test/test_data/sq
|
|
250
|
+
- test/test_data/sv
|
|
251
|
+
- test/test_data/sw
|
|
252
|
+
- test/test_data/ta
|
|
253
|
+
- test/test_data/te
|
|
254
|
+
- test/test_data/th
|
|
255
|
+
- test/test_data/tl
|
|
256
|
+
- test/test_data/tr
|
|
257
|
+
- test/test_data/uk
|
|
258
|
+
- test/test_data/ur
|
|
259
|
+
- test/test_data/vi
|
|
260
|
+
- test/test_data/zh-tw
|
|
261
|
+
- test/test_detector.rb
|
|
262
|
+
- test/test_detector_factory.rb
|
|
263
|
+
- test/test_java_property_reader.rb
|
|
264
|
+
- test/test_lang_profile.rb
|
|
265
|
+
- test/test_language.rb
|
|
266
|
+
- test/test_language_detection_facade.rb
|
|
267
|
+
- test/test_langusta.rb
|
|
268
|
+
- test/test_n_gram.rb
|
|
269
|
+
- test/test_tag_extractor.rb
|
|
270
|
+
- test/test_ucs2_string.rb
|
|
271
|
+
- test/test_unicode_block.rb
|
|
272
|
+
has_rdoc: true
|
|
273
|
+
homepage: http://github.com/jasiek/langusta
|
|
274
|
+
licenses:
|
|
275
|
+
- Apache 2.0
|
|
276
|
+
post_install_message:
|
|
277
|
+
rdoc_options: []
|
|
278
|
+
|
|
279
|
+
require_paths:
|
|
280
|
+
- lib
|
|
281
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
282
|
+
none: false
|
|
283
|
+
requirements:
|
|
284
|
+
- - ">="
|
|
285
|
+
- !ruby/object:Gem::Version
|
|
286
|
+
hash: 3
|
|
287
|
+
segments:
|
|
288
|
+
- 0
|
|
289
|
+
version: "0"
|
|
290
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
291
|
+
none: false
|
|
292
|
+
requirements:
|
|
293
|
+
- - ">="
|
|
294
|
+
- !ruby/object:Gem::Version
|
|
295
|
+
hash: 3
|
|
296
|
+
segments:
|
|
297
|
+
- 0
|
|
298
|
+
version: "0"
|
|
299
|
+
requirements: []
|
|
300
|
+
|
|
301
|
+
rubyforge_project:
|
|
302
|
+
rubygems_version: 1.5.1
|
|
303
|
+
signing_key:
|
|
304
|
+
specification_version: 3
|
|
305
|
+
summary: Language detection library based on http://code.google.com/p/language-detection/.
|
|
306
|
+
test_files:
|
|
307
|
+
- test/helper.rb
|
|
308
|
+
- test/quality/test_falsified.rb
|
|
309
|
+
- test/test_command.rb
|
|
310
|
+
- test/test_detector.rb
|
|
311
|
+
- test/test_detector_factory.rb
|
|
312
|
+
- test/test_java_property_reader.rb
|
|
313
|
+
- test/test_lang_profile.rb
|
|
314
|
+
- test/test_language.rb
|
|
315
|
+
- test/test_language_detection_facade.rb
|
|
316
|
+
- test/test_langusta.rb
|
|
317
|
+
- test/test_n_gram.rb
|
|
318
|
+
- test/test_tag_extractor.rb
|
|
319
|
+
- test/test_ucs2_string.rb
|
|
320
|
+
- test/test_unicode_block.rb
|