ruby-jdict 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSING +28 -28
- data/README.md +18 -20
- data/Rakefile +41 -30
- data/examples/query.rb +19 -22
- data/lib/ruby-jdict.rb +14 -0
- data/lib/{constants.rb → ruby-jdict/constants.rb} +73 -64
- data/lib/ruby-jdict/convert.rb +33 -0
- data/lib/ruby-jdict/dictionary.rb +59 -0
- data/lib/ruby-jdict/index.rb +151 -0
- data/lib/ruby-jdict/indexer/dictionary_indexer.rb +28 -0
- data/lib/ruby-jdict/indexer/libxml_dictionary_indexer.rb +164 -0
- data/lib/ruby-jdict/indexer/nokogiri_dictionary_indexer.rb +60 -0
- data/lib/ruby-jdict/jdict.rb +2 -0
- data/lib/ruby-jdict/models/entry.rb +64 -0
- data/lib/ruby-jdict/models/sense.rb +81 -0
- data/lib/ruby-jdict/version.rb +3 -3
- data/spec/convert_spec.rb +27 -0
- data/spec/dictionary_spec.rb +113 -113
- data/spec/entry_spec.rb +25 -0
- data/spec/fixtures/feeds/sample_entry.xml +32 -32
- data/spec/index_spec.rb +82 -84
- data/spec/spec_helper.rb +49 -49
- metadata +35 -36
- data/examples/lst.txt +0 -4
- data/lib/configuration.rb +0 -34
- data/lib/dictionaries/jmdict.rb +0 -38
- data/lib/dictionary.rb +0 -90
- data/lib/downloader.rb +0 -42
- data/lib/entry.rb +0 -101
- data/lib/index.rb +0 -305
- data/lib/jdict.rb +0 -20
- data/lib/kana.rb +0 -4
- data/lib/kanji.rb +0 -4
- data/lib/sense.rb +0 -28
- data/lib/unicode.rb +0 -63
- data/spec/configuration_spec.rb +0 -20
- data/spec/jmdict_spec.rb +0 -19
data/lib/unicode.rb
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
module JDict
|
2
|
-
module Unicode
|
3
|
-
# Codepoint ranges for japanese unicode characters (in decimal)
|
4
|
-
# from: http://unicode.org/charts/
|
5
|
-
module CodepointRanges
|
6
|
-
HIRAGANA = 12352..12447
|
7
|
-
KATAKANA = 12448..12543
|
8
|
-
KATAKANA_PHONETIC = 12784..12799
|
9
|
-
HALFWIDTH_KATAKANA = 65280..65519
|
10
|
-
UNIFIED_CJK = 19968..40911
|
11
|
-
UNIFIED_CJK_EXT_A = 13312..19903
|
12
|
-
UNIFIED_CJK_EXT_B = 131072..173791
|
13
|
-
PUNCTUATION = 12288..12351
|
14
|
-
end
|
15
|
-
|
16
|
-
# Get Unicode hex codepoint from a Unicode character
|
17
|
-
def hex_codepoint(unicode_char)
|
18
|
-
unicode_char.unpack("U0U*")[0]
|
19
|
-
end
|
20
|
-
|
21
|
-
# TODO: write unit test with a variety of strings to ensure this method
|
22
|
-
# returns the expected output
|
23
|
-
# Determine the script of the specified string:
|
24
|
-
# :kanji
|
25
|
-
# :kana
|
26
|
-
# :english
|
27
|
-
def script_type?(unicode_string)
|
28
|
-
type = ''
|
29
|
-
|
30
|
-
unicode_string.each_char do |c|
|
31
|
-
code = hex_codepoint(c)
|
32
|
-
#kana
|
33
|
-
if CodepointRanges::HIRAGANA.include?(code) ||
|
34
|
-
CodepointRanges::KATAKANA.include?(code) ||
|
35
|
-
CodepointRanges::KATAKANA_PHONETIC.include?(code) ||
|
36
|
-
CodepointRanges::HALFWIDTH_KATAKANA.include?(code) ||
|
37
|
-
CodepointRanges::PUNCTUATION.include?(code) then
|
38
|
-
type = :kana
|
39
|
-
break
|
40
|
-
#kanji
|
41
|
-
elsif CodepointRanges::UNIFIED_CJK.include?(code) ||
|
42
|
-
CodepointRanges::UNIFIED_CJK_EXT_A.include?(code) ||
|
43
|
-
CodepointRanges::UNIFIED_CJK_EXT_B.include?(code) then
|
44
|
-
type = :kanji
|
45
|
-
#english
|
46
|
-
else
|
47
|
-
type = :english
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
type
|
52
|
-
end
|
53
|
-
|
54
|
-
def japanese?(unicode_string)
|
55
|
-
type = script_type?(unicode_string)
|
56
|
-
type == :kanji || type == :kana
|
57
|
-
end
|
58
|
-
def english?(unicode_string)
|
59
|
-
type = script_type?(unicode_string)
|
60
|
-
type == :english
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
data/spec/configuration_spec.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
require 'configuration'
|
3
|
-
|
4
|
-
module JDict
|
5
|
-
describe Configuration do
|
6
|
-
describe "#debug" do
|
7
|
-
it "default value is false" do
|
8
|
-
Configuration.new.debug = false
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
describe "#debug=" do
|
13
|
-
it "can set value" do
|
14
|
-
config = Configuration.new
|
15
|
-
config.debug = true
|
16
|
-
expect(config.debug).to eq(true)
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
data/spec/jmdict_spec.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
require BASE_PATH + '/lib/dictionary'
|
3
|
-
require BASE_PATH + '/lib/jmdict'
|
4
|
-
|
5
|
-
module JMDictSpecHelper
|
6
|
-
INDEX_PATH = File.join(BASE_PATH+'/index')
|
7
|
-
end
|
8
|
-
|
9
|
-
describe JDict::JMDict do
|
10
|
-
include JMDictSpecHelper
|
11
|
-
|
12
|
-
before do
|
13
|
-
@jmdict = JDict::JMDict.new(JMDictSpecHelper::INDEX_PATH)
|
14
|
-
end
|
15
|
-
|
16
|
-
it do
|
17
|
-
@jmdict.should be_a_kind_of(JDict::Dictionary)
|
18
|
-
end
|
19
|
-
end
|