jmdict 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/Gemfile +6 -0
- data/LICENSE +21 -0
- data/README.md +74 -0
- data/Rakefile +2 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/jmdict.gemspec +26 -0
- data/lib/jmdict.rb +100 -0
- data/lib/jmdict/version.rb +3 -0
- metadata +96 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 50e74e443e242733de9d3d9031efeba47e6a7d38
|
|
4
|
+
data.tar.gz: d683f0678b436503ba017551100ce348c751fc6e
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 22abdee7ee5daee1b77d7b5dee2aa998d64cd3f13f73889598753780cae4273109d15cd28376e5c900867540550aa8900559870bfab6a95e8f89d8c6a71356fd
|
|
7
|
+
data.tar.gz: 3b1d66c42fc6a1aef0a69124108d5e1a391125c4a2b3e7458f8dc6ebfb451f17ce2c581a61ab6967af66b6a7bbf62273d1eb82a6e9d9c546072f188245588b8d
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT LICENSE
|
|
2
|
+
|
|
3
|
+
Copyright (c) Ramiro Antonio <ramiro.antonio@outlook.com>
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# JMDict Gem
|
|
2
|
+
|
|
3
|
+
This is a simple gem that takes in an XML file containing the [JMDict](http://www.edrdg.org/jmdict/edict_doc.html) Japanese/English dictionary from the Electronic Dictionary Research and Development Group. To avoid dealing with the parsing of this complex file this gem gives the functionality of retrieving each entry in a Ruby Hash more friendly to just focus on the processing step.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
Add this line to your application's Gemfile:
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
gem 'jmdict'
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
And then execute:
|
|
14
|
+
|
|
15
|
+
$ bundle install
|
|
16
|
+
|
|
17
|
+
Or install it yourself as:
|
|
18
|
+
|
|
19
|
+
$ gem install jmdict
|
|
20
|
+
|
|
21
|
+
And use it in your application:
|
|
22
|
+
|
|
23
|
+
```ruby
|
|
24
|
+
require 'jmdict'
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Dependencies
|
|
28
|
+
|
|
29
|
+
This gem depends on `nokogiri` for XML parsing.
|
|
30
|
+
|
|
31
|
+
## Usage
|
|
32
|
+
|
|
33
|
+
The usage is very simple, just do:
|
|
34
|
+
```ruby
|
|
35
|
+
jmdict = JMDict.new("JMdict.xml")
|
|
36
|
+
jmdict.each_entry do |entry|
|
|
37
|
+
# Here you can play with the entry
|
|
38
|
+
end
|
|
39
|
+
```
|
|
40
|
+
Example of an entry hash:
|
|
41
|
+
```
|
|
42
|
+
{"ent_seq"=>1000310,
|
|
43
|
+
"k_ele"=>[{"keb"=>"馬酔木", "ke_inf"=>[], "ke_pri"=>[]}],
|
|
44
|
+
"r_ele"=>[{"reb"=>"あせび", "re_nokanji"=>"", "re_restr"=>[], "re_inf"=>[], "re_pri"=>[]},
|
|
45
|
+
{"reb"=>"あしび", "re_nokanji"=>"", "re_restr"=>[], "re_inf"=>[], "re_pri"=>[]},
|
|
46
|
+
{"reb"=>"あせぼ", "re_nokanji"=>"", "re_restr"=>[], "re_inf"=>[], "re_pri"=>[]},
|
|
47
|
+
{"reb"=>"あせぶ", "re_nokanji"=>"", "re_restr"=>[], "re_inf"=>[], "re_pri"=>[]},
|
|
48
|
+
{"reb"=>"アセビ", "re_nokanji"=>"", "re_restr"=>[], "re_inf"=>[], "re_pri"=>[]}],
|
|
49
|
+
"sense"=>[{"stagk"=>[], "stagr"=>[], "pos"=>["noun (common) (futsuumeishi)"], "xref"=>[], "ant"=>[],
|
|
50
|
+
"field"=>[], "misc"=>["word usually written using kana alone"], "s_inf"=>[], "l_source"=>[], "dial"=>[], "gloss"=>[{"lang"=>nil, "gloss"=>"Japanese andromeda (Pieris japonica)"},{"lang"=>nil,"gloss"=>"lily-of-the-valley"}]},
|
|
51
|
+
{"stagk"=>[], "stagr"=>[], "pos"=>[], "xref"=>[], "ant"=>[], "field"=>[], "misc"=>[], "s_inf"=>[], "l_source"=>[], "dial"=>[], "gloss"=>[{"lang"=>"dut", "gloss"=>"{plantk.} Japanse rotsheide"}, {"lang"=>"dut", "gloss"=>"Pieris japonica"}]},
|
|
52
|
+
{"stagk"=>[], "stagr"=>[], "pos"=>[], "xref"=>[], "ant"=>[], "field"=>[], "misc"=>[], "s_inf"=>[], "l_source"=>[], "dial"=>[], "gloss"=>[{"lang"=>"dut", "gloss"=>"Ashibi {naam van een tanka-tijdschrift, 1903-1908}"}]},
|
|
53
|
+
{"stagk"=>[], "stagr"=>[], "pos"=>[], "xref"=>[], "ant"=>[], "field"=>[], "misc"=>[], "s_inf"=>[], "l_source"=>[], "dial"=>[], "gloss"=>[{"lang"=>"dut", "gloss"=>"Ashibi {sedert 1928 de naam van het vanaf 1922 gepubliceerde haiku-tijdschrift Hamayumi 破魔弓}"}, {"lang"=>"dut", "gloss"=>"{plantk.} Japanse rotsheide"}, {"lang"=>"dut", "gloss"=>"Pieris japonica"}]},
|
|
54
|
+
{"stagk"=>[], "stagr"=>[], "pos"=>[], "xref"=>[], "ant"=>[], "field"=>[], "misc"=>[], "s_inf"=>[], "l_source"=>[], "dial"=>[], "gloss"=>[{"lang"=>"ger", "gloss"=>"(f) Lavendelheide"}, {"lang"=>"ger", "gloss"=>"Pieris japonica"}]},
|
|
55
|
+
{"stagk"=>[], "stagr"=>[], "pos"=>[], "xref"=>[], "ant"=>[], "field"=>[], "misc"=>[], "s_inf"=>[], "l_source"=>[], "dial"=>[], "gloss"=>[{"lang"=>"rus", "gloss"=>"(см.) あせび"}, {"lang"=>"rus", "gloss"=>"(бот.) подбел, Picris japonicum (D. Don.)"}]}]
|
|
56
|
+
}
|
|
57
|
+
```
|
|
58
|
+
Please read the [DTD](view-source:http://www.edrdg.org/jmdict/dtd-jmdict.xml) for more information and to know how to deal with this data. This is only a wrapper.
|
|
59
|
+
|
|
60
|
+
## Versioning
|
|
61
|
+
|
|
62
|
+
I decided to put a version number that follows the same pattern as the official DTD revision version for aknowledge the compatibility of this gem with the XML file used.
|
|
63
|
+
|
|
64
|
+
## Performance
|
|
65
|
+
|
|
66
|
+
It's important to advice about the memory and time consumption that all this parsing could take. The file itself and the Nokogiri structures takes about ~2 GB of memory with or without this gem. In matter of time we have a notable increase for converting the Node element into a Hash. Using this gem therefore takes more time than dealing with Nokogiri directly but it's aimed for a one shot use for processing the XML file and brings a more unified form of accessing all the fields.
|
|
67
|
+
|
|
68
|
+
## License
|
|
69
|
+
|
|
70
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
|
71
|
+
|
|
72
|
+
## Contributing
|
|
73
|
+
|
|
74
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/r-antonio/jmdict.
|
data/Rakefile
ADDED
data/bin/console
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "jmdict"
|
|
5
|
+
|
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
|
8
|
+
|
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
|
10
|
+
# require "pry"
|
|
11
|
+
# Pry.start
|
|
12
|
+
|
|
13
|
+
require "irb"
|
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/jmdict.gemspec
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
|
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
|
+
require "jmdict/version"
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |spec|
|
|
7
|
+
spec.name = "jmdict"
|
|
8
|
+
spec.version = JMDict::VERSION
|
|
9
|
+
spec.authors = ["Ramiro Antonio"]
|
|
10
|
+
spec.email = ["ramiro.antonio@outlook.com"]
|
|
11
|
+
|
|
12
|
+
spec.summary = %q{A gem to get parsed entries of the JMDict Japanese dictionary}
|
|
13
|
+
spec.description = %q{This gem parses the JMDict XML file to get a more friendly hash interface for its entries}
|
|
14
|
+
spec.homepage = "https://github.com/r-antonio/jmdict"
|
|
15
|
+
|
|
16
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
|
17
|
+
f.match(%r{^(test|spec|features)/})
|
|
18
|
+
end
|
|
19
|
+
spec.bindir = "exe"
|
|
20
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
21
|
+
spec.require_paths = ["lib"]
|
|
22
|
+
|
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.16"
|
|
24
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
|
25
|
+
spec.add_dependency "nokogiri"
|
|
26
|
+
end
|
data/lib/jmdict.rb
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
require "jmdict/version"
|
|
2
|
+
require 'nokogiri'
|
|
3
|
+
|
|
4
|
+
class JMDict
|
|
5
|
+
@jmdict_file = nil
|
|
6
|
+
|
|
7
|
+
def initialize(filename)
|
|
8
|
+
@jmdict_file = File.open(filename) { |f| Nokogiri::XML(f) }
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def each_entry()
|
|
12
|
+
@jmdict_file.xpath('JMdict/entry').each do |e|
|
|
13
|
+
yield(parse_entry(e))
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Many elements consists of text only subelements
|
|
18
|
+
def fill_elems(tag, element)
|
|
19
|
+
aux = []
|
|
20
|
+
element.css(tag).each do |elem|
|
|
21
|
+
aux << elem.text
|
|
22
|
+
end
|
|
23
|
+
aux
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# For the fields info see the DTD Rev 1.09 (http://www.edrdg.org/jmdict/dtd-jmdict.xml)
|
|
27
|
+
def parse_entry(entry)
|
|
28
|
+
|
|
29
|
+
this_entry = {}
|
|
30
|
+
|
|
31
|
+
this_entry["ent_seq"] = entry.css("ent_seq").text.to_i
|
|
32
|
+
|
|
33
|
+
this_entry["k_ele"] = []
|
|
34
|
+
|
|
35
|
+
entry.css("k_ele").each do |elem|
|
|
36
|
+
k_ele = {}
|
|
37
|
+
k_ele['keb'] = elem.css('keb').text
|
|
38
|
+
k_ele['ke_inf'] = fill_elems('ke_inf', elem)
|
|
39
|
+
k_ele['ke_pri'] = fill_elems('ke_pri', elem)
|
|
40
|
+
this_entry['k_ele'] << k_ele
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
this_entry["r_ele"] = []
|
|
44
|
+
|
|
45
|
+
entry.css("r_ele").each do |elem|
|
|
46
|
+
r_ele = {}
|
|
47
|
+
r_ele['reb'] = elem.css('reb').text
|
|
48
|
+
r_ele['re_nokanji'] = elem.css('re_nokanji').text
|
|
49
|
+
r_ele['re_restr'] = fill_elems('re_restr', elem)
|
|
50
|
+
r_ele['re_inf'] = fill_elems('re_inf', elem)
|
|
51
|
+
r_ele['re_pri'] = fill_elems('re_pri', elem)
|
|
52
|
+
this_entry['r_ele'] << r_ele
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
this_entry["sense"] = []
|
|
56
|
+
|
|
57
|
+
entry.css("sense").each do |s|
|
|
58
|
+
sense = {}
|
|
59
|
+
sense['stagk'] = fill_elems('stagk', s)
|
|
60
|
+
sense['stagr'] = fill_elems('stagr', s)
|
|
61
|
+
sense['pos'] = fill_elems('pos', s)
|
|
62
|
+
sense['xref'] = fill_elems('xref', s)
|
|
63
|
+
sense['ant'] = fill_elems('ant', s)
|
|
64
|
+
sense['field'] = fill_elems('field', s)
|
|
65
|
+
sense['misc'] = fill_elems('misc', s)
|
|
66
|
+
sense['s_inf'] = fill_elems('s_inf', s)
|
|
67
|
+
sense['l_source'] = []
|
|
68
|
+
s.css('l_source').each do |ls|
|
|
69
|
+
l_source = {}
|
|
70
|
+
l_source['lang'] = ls.lang
|
|
71
|
+
l_source['ls_type'] = ls.ls_type
|
|
72
|
+
l_source['ls_wasei'] = ls.ls_wasei
|
|
73
|
+
l_source['ls_source'] = ls.ls_source.text
|
|
74
|
+
sense['l_source'] << l_source
|
|
75
|
+
end
|
|
76
|
+
sense['dial'] = fill_elems('dial', s)
|
|
77
|
+
sense['gloss'] = []
|
|
78
|
+
s.css('gloss').each do |g|
|
|
79
|
+
gloss = {}
|
|
80
|
+
gloss['lang'] = g.lang
|
|
81
|
+
|
|
82
|
+
# Give errors if accesed the same way as lang attribute when are not present
|
|
83
|
+
gloss['g_gend'] = g['g_gend'] if g.key? 'g_gend'
|
|
84
|
+
gloss['g_type'] = g['g_type'] if g.key? 'g_type'
|
|
85
|
+
|
|
86
|
+
# The gloss could have a text PCData or a 'pri' element.
|
|
87
|
+
# Currently in rev 1.09 there is no 'pri' with content at all. Just for the sake of DTD.
|
|
88
|
+
gloss['pri'] = g.at_css('pri').text unless g.at_css('pri').nil?
|
|
89
|
+
|
|
90
|
+
gloss['gloss'] = g.text
|
|
91
|
+
sense['gloss'] << gloss
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
this_entry['sense'] << sense
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
return this_entry
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: jmdict
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 1.9.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Ramiro Antonio
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2018-03-18 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: bundler
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '1.16'
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '1.16'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rake
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '10.0'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '10.0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: nokogiri
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ">="
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '0'
|
|
48
|
+
type: :runtime
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ">="
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '0'
|
|
55
|
+
description: This gem parses the JMDict XML file to get a more friendly hash interface
|
|
56
|
+
for its entries
|
|
57
|
+
email:
|
|
58
|
+
- ramiro.antonio@outlook.com
|
|
59
|
+
executables: []
|
|
60
|
+
extensions: []
|
|
61
|
+
extra_rdoc_files: []
|
|
62
|
+
files:
|
|
63
|
+
- ".gitignore"
|
|
64
|
+
- Gemfile
|
|
65
|
+
- LICENSE
|
|
66
|
+
- README.md
|
|
67
|
+
- Rakefile
|
|
68
|
+
- bin/console
|
|
69
|
+
- bin/setup
|
|
70
|
+
- jmdict.gemspec
|
|
71
|
+
- lib/jmdict.rb
|
|
72
|
+
- lib/jmdict/version.rb
|
|
73
|
+
homepage: https://github.com/r-antonio/jmdict
|
|
74
|
+
licenses: []
|
|
75
|
+
metadata: {}
|
|
76
|
+
post_install_message:
|
|
77
|
+
rdoc_options: []
|
|
78
|
+
require_paths:
|
|
79
|
+
- lib
|
|
80
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
81
|
+
requirements:
|
|
82
|
+
- - ">="
|
|
83
|
+
- !ruby/object:Gem::Version
|
|
84
|
+
version: '0'
|
|
85
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
86
|
+
requirements:
|
|
87
|
+
- - ">="
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
version: '0'
|
|
90
|
+
requirements: []
|
|
91
|
+
rubyforge_project:
|
|
92
|
+
rubygems_version: 2.5.1
|
|
93
|
+
signing_key:
|
|
94
|
+
specification_version: 4
|
|
95
|
+
summary: A gem to get parsed entries of the JMDict Japanese dictionary
|
|
96
|
+
test_files: []
|