ruby-jdict 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSING +28 -28
- data/README.md +18 -20
- data/Rakefile +41 -30
- data/examples/query.rb +19 -22
- data/lib/ruby-jdict.rb +14 -0
- data/lib/{constants.rb → ruby-jdict/constants.rb} +73 -64
- data/lib/ruby-jdict/convert.rb +33 -0
- data/lib/ruby-jdict/dictionary.rb +59 -0
- data/lib/ruby-jdict/index.rb +151 -0
- data/lib/ruby-jdict/indexer/dictionary_indexer.rb +28 -0
- data/lib/ruby-jdict/indexer/libxml_dictionary_indexer.rb +164 -0
- data/lib/ruby-jdict/indexer/nokogiri_dictionary_indexer.rb +60 -0
- data/lib/ruby-jdict/jdict.rb +2 -0
- data/lib/ruby-jdict/models/entry.rb +64 -0
- data/lib/ruby-jdict/models/sense.rb +81 -0
- data/lib/ruby-jdict/version.rb +3 -3
- data/spec/convert_spec.rb +27 -0
- data/spec/dictionary_spec.rb +113 -113
- data/spec/entry_spec.rb +25 -0
- data/spec/fixtures/feeds/sample_entry.xml +32 -32
- data/spec/index_spec.rb +82 -84
- data/spec/spec_helper.rb +49 -49
- metadata +35 -36
- data/examples/lst.txt +0 -4
- data/lib/configuration.rb +0 -34
- data/lib/dictionaries/jmdict.rb +0 -38
- data/lib/dictionary.rb +0 -90
- data/lib/downloader.rb +0 -42
- data/lib/entry.rb +0 -101
- data/lib/index.rb +0 -305
- data/lib/jdict.rb +0 -20
- data/lib/kana.rb +0 -4
- data/lib/kanji.rb +0 -4
- data/lib/sense.rb +0 -28
- data/lib/unicode.rb +0 -63
- data/spec/configuration_spec.rb +0 -20
- data/spec/jmdict_spec.rb +0 -19
data/lib/unicode.rb
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
module JDict
|
2
|
-
module Unicode
|
3
|
-
# Codepoint ranges for japanese unicode characters (in decimal)
|
4
|
-
# from: http://unicode.org/charts/
|
5
|
-
module CodepointRanges
|
6
|
-
HIRAGANA = 12352..12447
|
7
|
-
KATAKANA = 12448..12543
|
8
|
-
KATAKANA_PHONETIC = 12784..12799
|
9
|
-
HALFWIDTH_KATAKANA = 65280..65519
|
10
|
-
UNIFIED_CJK = 19968..40911
|
11
|
-
UNIFIED_CJK_EXT_A = 13312..19903
|
12
|
-
UNIFIED_CJK_EXT_B = 131072..173791
|
13
|
-
PUNCTUATION = 12288..12351
|
14
|
-
end
|
15
|
-
|
16
|
-
# Get Unicode hex codepoint from a Unicode character
|
17
|
-
def hex_codepoint(unicode_char)
|
18
|
-
unicode_char.unpack("U0U*")[0]
|
19
|
-
end
|
20
|
-
|
21
|
-
# TODO: write unit test with a variety of strings to ensure this method
|
22
|
-
# returns the expected output
|
23
|
-
# Determine the script of the specified string:
|
24
|
-
# :kanji
|
25
|
-
# :kana
|
26
|
-
# :english
|
27
|
-
def script_type?(unicode_string)
|
28
|
-
type = ''
|
29
|
-
|
30
|
-
unicode_string.each_char do |c|
|
31
|
-
code = hex_codepoint(c)
|
32
|
-
#kana
|
33
|
-
if CodepointRanges::HIRAGANA.include?(code) ||
|
34
|
-
CodepointRanges::KATAKANA.include?(code) ||
|
35
|
-
CodepointRanges::KATAKANA_PHONETIC.include?(code) ||
|
36
|
-
CodepointRanges::HALFWIDTH_KATAKANA.include?(code) ||
|
37
|
-
CodepointRanges::PUNCTUATION.include?(code) then
|
38
|
-
type = :kana
|
39
|
-
break
|
40
|
-
#kanji
|
41
|
-
elsif CodepointRanges::UNIFIED_CJK.include?(code) ||
|
42
|
-
CodepointRanges::UNIFIED_CJK_EXT_A.include?(code) ||
|
43
|
-
CodepointRanges::UNIFIED_CJK_EXT_B.include?(code) then
|
44
|
-
type = :kanji
|
45
|
-
#english
|
46
|
-
else
|
47
|
-
type = :english
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
type
|
52
|
-
end
|
53
|
-
|
54
|
-
def japanese?(unicode_string)
|
55
|
-
type = script_type?(unicode_string)
|
56
|
-
type == :kanji || type == :kana
|
57
|
-
end
|
58
|
-
def english?(unicode_string)
|
59
|
-
type = script_type?(unicode_string)
|
60
|
-
type == :english
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
data/spec/configuration_spec.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
require 'configuration'
|
3
|
-
|
4
|
-
module JDict
|
5
|
-
describe Configuration do
|
6
|
-
describe "#debug" do
|
7
|
-
it "default value is false" do
|
8
|
-
Configuration.new.debug = false
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
describe "#debug=" do
|
13
|
-
it "can set value" do
|
14
|
-
config = Configuration.new
|
15
|
-
config.debug = true
|
16
|
-
expect(config.debug).to eq(true)
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
data/spec/jmdict_spec.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
require BASE_PATH + '/lib/dictionary'
|
3
|
-
require BASE_PATH + '/lib/jmdict'
|
4
|
-
|
5
|
-
module JMDictSpecHelper
|
6
|
-
INDEX_PATH = File.join(BASE_PATH+'/index')
|
7
|
-
end
|
8
|
-
|
9
|
-
describe JDict::JMDict do
|
10
|
-
include JMDictSpecHelper
|
11
|
-
|
12
|
-
before do
|
13
|
-
@jmdict = JDict::JMDict.new(JMDictSpecHelper::INDEX_PATH)
|
14
|
-
end
|
15
|
-
|
16
|
-
it do
|
17
|
-
@jmdict.should be_a_kind_of(JDict::Dictionary)
|
18
|
-
end
|
19
|
-
end
|