kanjidic2 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e22b7026685eb60c115cdcfd59980a822bd000cd
4
+ data.tar.gz: 318fc0ae985f12608a777d5e2bc1e1fb8f02f1e5
5
+ SHA512:
6
+ metadata.gz: e2967b9b8a7d64a774f17495f3c900c3a0b26e2227eca6bbe6889d2a29284aef9410c827a6947e757e4ccfd0928c92a917147cc4579053c47ead220204be61f4
7
+ data.tar.gz: 0b267c5265c0eb534270dcb77064629ec6129cc71aa6732a7616797f1500fbba05c9c3427e246256768373c6c0c948c446e20ca2e4261864dd7b2b410659da0c
@@ -0,0 +1,25 @@
1
+ # Because this is a gem, ignore Gemfile.lock:
2
+
3
+ Gemfile.lock
4
+
5
+ # And because this is Ruby, ignore the following
6
+ # (source: https://github.com/github/gitignore/blob/master/Ruby.gitignore):
7
+
8
+ *.gem
9
+ *.rbc
10
+ .bundle
11
+ .config
12
+ coverage
13
+ InstalledFiles
14
+ lib/bundler/man
15
+ pkg
16
+ rdoc
17
+ spec/reports
18
+ test/tmp
19
+ test/version_tmp
20
+ tmp
21
+
22
+ # YARD artifacts
23
+ .yardoc
24
+ _yardoc
25
+ doc/
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT LICENSE
2
+
3
+ Copyright (c) P. G. <p137@gmx.com>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,79 @@
1
+ # Kanjidic2
2
+
3
+ This gem makes parsing [KANJIDIC2](http://www.edrdg.org/kanjidic/kanjd2index.html) easier. See example usage for details.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'kanjidic2'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install kanjidic2
20
+
21
+ And use it in your application:
22
+
23
+ ```ruby
24
+ require 'kanjidic2'
25
+ ```
26
+
27
+
28
+ ## Dependencies
29
+
30
+ This gem depends on `nokogiri` for XML parsing.
31
+
32
+ ## Usage
33
+
34
+ Example code:
35
+
36
+ ```ruby
37
+ require "kanjidic2"
38
+
39
+ kanjidic2 = Kanjidic2.new("kanjidic2.xml")
40
+
41
+ kanjidic2.each_character do |character|
42
+ p character
43
+ sleep(5)
44
+ end
45
+ ```
46
+
47
+ Example output:
48
+
49
+ `{"literal"=>"亜", "codepoint"=>{"ucs"=>"4e9c", "jis208"=>"16-01"}, "radical"=>{"classical"=>"7", "nelson_c"=>"1"}, "misc"=>{"grade"=>"8", "stroke_count"=>"7", "variant"=>{"jis208"=>"48-19"}, "freq"=>"1509", "rad_name"=>"", "jlpt"=>"1"}, "dic_number"=>{"nelson_c"=>"43", "nelson_n"=>"81", "halpern_njecd"=>"3540", "halpern_kkd"=>"4354", "halpern_kkld"=>"2204", "halpern_kkld_2ed"=>"2966", "heisig"=>"1809", "heisig6"=>"1950", "gakken"=>"1331", "oneill_names"=>"525", "oneill_kk"=>"1788", "moro"=>{"m_vol"=>"1", "m_page"=>"0525", "value"=>"272"}, "henshall"=>"997", "sh_kk"=>"1616", "sh_kk2"=>"1724", "jf_cards"=>"1032", "tutt_cards"=>"1092", "kanji_in_context"=>"1818", "kodansha_compact"=>"35", "maniette"=>"1827"}, "query_code"=>{"skip"=>"4-7-1", "sh_desc"=>"0a7.14", "four_corner"=>"1010.6", "deroo"=>"3273"}, "reading_meaning"=>{"rmgroup"=>{"reading"=>{"pinyin"=>"ya4", "korean_r"=>"a", "korean_h"=>"아", "vietnam"=>"Á", "ja_on"=>{"on_type"=>nil, "r_status"=>nil, "value"=>"ア"}, "ja_kun"=>{"r_status"=>nil, "value"=>"つ.ぐ"}}, "meaning"=>{"en"=>["Asia", "rank next", "come after", "-ous"], "fr"=>["Asie", "suivant", "sub-", "sous-"], "es"=>["pref. para indicar", "venir después de", "Asia"], "pt"=>["Ásia", "próxima", "o que vem depois", "-ous"]}}, "nanori"=>["や", "つぎ", "つぐ"]}}
50
+ `
51
+
52
+ The output resembles the [DTD](http://www.edrdg.org/kanjidic/kanjidic2_dtdh.html) quite closely. Consult the DTD for details.
53
+
54
+ You can also access the header in its parsed form:
55
+
56
+ ```ruby
57
+ require "kanjidic2"
58
+
59
+ kanjidic2 = Kanjidic2.new("kanjidic2.xml")
60
+
61
+ p kanjidic2.header
62
+
63
+ end
64
+ ```
65
+
66
+ Example output:
67
+
68
+ `{"file_version"=>"4", "database_version"=>"2017-064", "date_of_creation"=>"2017-03-05"}`
69
+
70
+ ## Contributing
71
+
72
+ Bug reports and pull requests are welcome on GitHub at https://github.com/pgorni/kanjidic2.
73
+
74
+
75
+ ## License
76
+
77
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
78
+
79
+
@@ -0,0 +1,10 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs << 'lib'
6
+ t.pattern = 'test/**/*_test.rb'
7
+ t.verbose = false
8
+ end
9
+
10
+ task :default => :test
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'kanjidic2/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "kanjidic2"
8
+ spec.version = Kanjidic2::VERSION
9
+ spec.authors = ["Piotr Górni"]
10
+ spec.email = ["pgorni@teknik.io"]
11
+
12
+ spec.summary = %q{A KANJIDIC2 toolkit for Ruby}
13
+ spec.description = %q{A simple KANJIDIC2 toolkit for Ruby, based on nokogiri.}
14
+ spec.homepage = "https://rubygems.org/gems/kanjidic2"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
18
+ f.match(%r{^(test|spec|features)/})
19
+ end
20
+ spec.bindir = "exe"
21
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
+ spec.require_paths = ["lib"]
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.14"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ spec.add_development_dependency "rspec", "~> 3.0"
27
+ spec.add_dependency "nokogiri", "~> 1.7"
28
+ end
@@ -0,0 +1,153 @@
1
+ require "nokogiri"
2
+
3
+ class Kanjidic2
4
+
5
+ @kanjidic2_file = nil
6
+
7
+ def initialize(filename)
8
+ @kanjidic2_file = File.open(filename) { |f| Nokogiri::XML(f) }
9
+ end
10
+
11
+ def each_character()
12
+ @kanjidic2_file.css("character").each do |character|
13
+ yield(parse_chr(character))
14
+ end
15
+ end
16
+
17
+ def header
18
+ return parse_header(@kanjidic2_file)
19
+ end
20
+
21
+ private
22
+
23
+ def parse_header(kanjidic2_file)
24
+ header = kanjidic2_file.css("header")
25
+ parsed_header = {}
26
+ ["file_version", "database_version", "date_of_creation"].each do |header_elem|
27
+ parsed_header[header_elem] = header.css("#{header_elem}").text
28
+ end
29
+ return parsed_header
30
+ end
31
+
32
+ def parse_chr(character)
33
+
34
+ # This will be the parsed character.
35
+ this_character = {}
36
+
37
+ # character -> literal
38
+ this_character["literal"] = character.css("literal").text
39
+
40
+ # character -> codepoint
41
+ this_character["codepoint"] = {}
42
+
43
+ character.css("codepoint").css("cp_value").each do |cp_value|
44
+ this_character["codepoint"][cp_value["cp_type"]] = cp_value.text.strip
45
+ end
46
+
47
+ # character -> radical
48
+ this_character["radical"] = {}
49
+
50
+ character.css("radical").css("rad_value").each do |rad_value|
51
+ this_character["radical"][rad_value["rad_type"]] = rad_value.text.strip
52
+ end
53
+
54
+ # character -> misc
55
+
56
+ misc_data = character.css("misc")
57
+ this_character["misc"] = {}
58
+
59
+ # character -> misc -> grade
60
+ this_character["misc"]["grade"] = misc_data.css("grade").text
61
+
62
+ # character -> misc -> stroke_count
63
+ this_character["misc"]["stroke_count"] = misc_data.css("stroke_count").text
64
+
65
+ # character -> misc -> variant
66
+ this_character["misc"]["variant"] = {}
67
+
68
+ misc_data.css("misc").css("variant").each do |variant|
69
+ this_character["misc"]["variant"][variant["var_type"]] = variant.text.strip
70
+ end
71
+
72
+ # character -> misc -> freq
73
+ this_character["misc"]["freq"] = misc_data.css("freq").text
74
+
75
+ # character -> misc -> rad_name
76
+ this_character["misc"]["rad_name"] = misc_data.css("rad_name").text
77
+
78
+ # character -> misc -> jlpt
79
+ this_character["misc"]["jlpt"] = misc_data.css("jlpt").text
80
+
81
+ # character -> dic_number
82
+ this_character["dic_number"] = {}
83
+
84
+ character.css("dic_number").css("dic_ref").each do |dic_ref|
85
+ unless dic_ref["dr_type"] == "moro"
86
+ this_character["dic_number"][dic_ref["dr_type"]] = dic_ref.text.strip
87
+ else
88
+ this_character["dic_number"]["moro"] = {}
89
+ this_character["dic_number"]["moro"]["m_vol"] = dic_ref["m_vol"]
90
+ this_character["dic_number"]["moro"]["m_page"] = dic_ref["m_page"]
91
+ this_character["dic_number"]["moro"]["value"] = dic_ref.text.strip
92
+ end
93
+ end
94
+
95
+ # character -> query_code
96
+ this_character["query_code"] = {}
97
+ character.css("query_code").css("q_code").each do |q_code|
98
+ this_character["query_code"][q_code["qc_type"]] = q_code.text.strip
99
+ end
100
+
101
+ # character -> reading_meaning
102
+ reading_meaning_data = character.css("reading_meaning")
103
+ this_character["reading_meaning"] = {}
104
+
105
+ # character -> reading_meaning -> rmgroup
106
+ this_character["reading_meaning"]["rmgroup"] = {}
107
+ this_character["reading_meaning"]["rmgroup"]["reading"] = {}
108
+ this_character["reading_meaning"]["rmgroup"]["meaning"] = {}
109
+
110
+ # character -> reading_meaning -> rmgroup -> reading
111
+ reading_meaning_data.css("rmgroup").css("reading").each do |reading|
112
+ unless ["ja_on", "ja_kun"].include? reading["r_type"]
113
+ this_character["reading_meaning"]["rmgroup"]["reading"][reading["r_type"]] = reading.text.strip
114
+ else
115
+ if reading["r_type"] == "ja_on"
116
+ this_character["reading_meaning"]["rmgroup"]["reading"]["ja_on"] = {}
117
+ this_character["reading_meaning"]["rmgroup"]["reading"]["ja_on"]["on_type"] = reading["on_type"]
118
+ this_character["reading_meaning"]["rmgroup"]["reading"]["ja_on"]["r_status"] = reading["r_status"]
119
+ this_character["reading_meaning"]["rmgroup"]["reading"]["ja_on"]["value"] = reading.text.strip
120
+ else
121
+ this_character["reading_meaning"]["rmgroup"]["reading"]["ja_kun"] = {}
122
+ this_character["reading_meaning"]["rmgroup"]["reading"]["ja_kun"]["r_status"] = reading["r_status"]
123
+ this_character["reading_meaning"]["rmgroup"]["reading"]["ja_kun"]["value"] = reading.text.strip
124
+ end
125
+ end
126
+ end
127
+
128
+ # character -> reading_meaning -> rmgroup -> meaning
129
+ this_character["reading_meaning"]["rmgroup"]["meaning"]["en"] = []
130
+ this_character["reading_meaning"]["rmgroup"]["meaning"]["fr"] = []
131
+ this_character["reading_meaning"]["rmgroup"]["meaning"]["es"] = []
132
+ this_character["reading_meaning"]["rmgroup"]["meaning"]["pt"] = []
133
+
134
+ reading_meaning_data.css("rmgroup").css("meaning").each do |meaning|
135
+
136
+ if meaning["m_lang"].nil?
137
+ this_character["reading_meaning"]["rmgroup"]["meaning"]["en"] << meaning.text.strip
138
+ else
139
+ this_character["reading_meaning"]["rmgroup"]["meaning"][meaning["m_lang"]] << meaning.text.strip
140
+ end
141
+ end
142
+
143
+ # character -> reading_meaning -> nanori
144
+ this_character["reading_meaning"]["nanori"] = []
145
+
146
+ reading_meaning_data.css("nanori").each do |nanori|
147
+ this_character["reading_meaning"]["nanori"] << nanori.text.strip
148
+ end
149
+
150
+ return this_character
151
+ end
152
+
153
+ end
@@ -0,0 +1,3 @@
1
+ class Kanjidic2
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,108 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kanjidic2
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Piotr Górni
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-03-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.14'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.14'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.7'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.7'
69
+ description: A simple KANJIDIC2 toolkit for Ruby, based on nokogiri.
70
+ email:
71
+ - pgorni@teknik.io
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - Gemfile
78
+ - LICENSE
79
+ - README.md
80
+ - Rakefile
81
+ - kanjidic2.gemspec
82
+ - lib/kanjidic2.rb
83
+ - lib/kanjidic2/version.rb
84
+ homepage: https://rubygems.org/gems/kanjidic2
85
+ licenses:
86
+ - MIT
87
+ metadata: {}
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ requirements: []
103
+ rubyforge_project:
104
+ rubygems_version: 2.6.10
105
+ signing_key:
106
+ specification_version: 4
107
+ summary: A KANJIDIC2 toolkit for Ruby
108
+ test_files: []