eiwa 0.0.2 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +5 -8
- data/.standard.yml +1 -0
- data/Gemfile +5 -1
- data/Gemfile.lock +61 -35
- data/README.md +22 -13
- data/eiwa.gemspec +1 -6
- data/lib/eiwa/jmdict/doc.rb +85 -0
- data/lib/eiwa/jmdict/entities.rb +180 -0
- data/lib/eiwa/kanjidic/doc.rb +43 -0
- data/lib/eiwa/parses_file.rb +35 -0
- data/lib/eiwa/tag/antonym.rb +2 -2
- data/lib/eiwa/tag/any.rb +1 -1
- data/lib/eiwa/tag/bag.rb +21 -0
- data/lib/eiwa/tag/character.rb +24 -0
- data/lib/eiwa/tag/cross_reference.rb +3 -3
- data/lib/eiwa/tag/definition.rb +2 -2
- data/lib/eiwa/tag/entity.rb +2 -4
- data/lib/eiwa/tag/entry.rb +0 -2
- data/lib/eiwa/tag/list.rb +18 -0
- data/lib/eiwa/tag/meaning.rb +0 -2
- data/lib/eiwa/tag/other.rb +5 -3
- data/lib/eiwa/tag/reading.rb +0 -2
- data/lib/eiwa/tag/reading_meaning.rb +11 -0
- data/lib/eiwa/tag/source_language.rb +2 -2
- data/lib/eiwa/tag/spelling.rb +0 -2
- data/lib/eiwa/version.rb +1 -1
- data/lib/eiwa.rb +19 -7
- metadata +19 -83
- data/lib/eiwa/jmdict_doc.rb +0 -93
- data/lib/eiwa/jmdict_entities.rb +0 -178
- data/lib/eiwa/parses_jmdict_file.rb +0 -21
@@ -12,7 +12,7 @@ module Eiwa
|
|
12
12
|
def end_self
|
13
13
|
parts = @characters.split("・")
|
14
14
|
@text = parts.first
|
15
|
-
@reading = parts[1
|
15
|
+
@reading = parts[1..].find { |part| /[^0-9]/.match(part) }
|
16
16
|
@sense_ordinal = parts.find { |part| /^[0-9]+$/.match(part) }&.to_i
|
17
17
|
end
|
18
18
|
|
@@ -21,10 +21,10 @@ module Eiwa
|
|
21
21
|
@reading == other.reading &&
|
22
22
|
@sense_ordinal == other.sense_ordinal
|
23
23
|
end
|
24
|
-
|
24
|
+
alias_method :==, :eql?
|
25
25
|
|
26
26
|
def hash
|
27
|
-
@text
|
27
|
+
[@text, @reading, @sense_ordinal].hash
|
28
28
|
end
|
29
29
|
end
|
30
30
|
end
|
data/lib/eiwa/tag/definition.rb
CHANGED
@@ -35,10 +35,10 @@ module Eiwa
|
|
35
35
|
@gender == other.gender &&
|
36
36
|
@type == other.type
|
37
37
|
end
|
38
|
-
|
38
|
+
alias_method :==, :eql?
|
39
39
|
|
40
40
|
def hash
|
41
|
-
@text
|
41
|
+
[@text, @language, @gender, @type].hash
|
42
42
|
end
|
43
43
|
end
|
44
44
|
end
|
data/lib/eiwa/tag/entity.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require_relative "any"
|
2
|
-
|
3
1
|
module Eiwa
|
4
2
|
module Tag
|
5
3
|
class Entity < Any
|
@@ -19,10 +17,10 @@ module Eiwa
|
|
19
17
|
@code == other.code &&
|
20
18
|
@text == other.text
|
21
19
|
end
|
22
|
-
|
20
|
+
alias_method :==, :eql?
|
23
21
|
|
24
22
|
def hash
|
25
|
-
@code
|
23
|
+
[@code, @text].hash
|
26
24
|
end
|
27
25
|
end
|
28
26
|
end
|
data/lib/eiwa/tag/entry.rb
CHANGED
@@ -0,0 +1,18 @@
|
|
1
|
+
module Eiwa
|
2
|
+
module Tag
|
3
|
+
# For containers of lists or repeated elements
|
4
|
+
class List < Any
|
5
|
+
Item = Struct.new(:name, :attrs, :text, keyword_init: true)
|
6
|
+
|
7
|
+
attr_reader :items
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@items = []
|
11
|
+
end
|
12
|
+
|
13
|
+
def end_child(child)
|
14
|
+
@items << Item.new(name: child.tag_name, attrs: child.attrs, text: child.text)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/eiwa/tag/meaning.rb
CHANGED
data/lib/eiwa/tag/other.rb
CHANGED
data/lib/eiwa/tag/reading.rb
CHANGED
@@ -23,10 +23,10 @@ module Eiwa
|
|
23
23
|
@wasei == other.wasei &&
|
24
24
|
@type == other.type
|
25
25
|
end
|
26
|
-
|
26
|
+
alias_method :==, :eql?
|
27
27
|
|
28
28
|
def hash
|
29
|
-
@text
|
29
|
+
[@text, @language, @wasei, @type].hash
|
30
30
|
end
|
31
31
|
end
|
32
32
|
end
|
data/lib/eiwa/tag/spelling.rb
CHANGED
data/lib/eiwa/version.rb
CHANGED
data/lib/eiwa.rb
CHANGED
@@ -1,15 +1,27 @@
|
|
1
1
|
require "eiwa/version"
|
2
|
-
|
2
|
+
|
3
|
+
require "eiwa/tag/any"
|
4
|
+
require "eiwa/tag/character"
|
5
|
+
require "eiwa/tag/bag"
|
6
|
+
require "eiwa/tag/list"
|
7
|
+
require "eiwa/tag/reading_meaning"
|
8
|
+
require "eiwa/tag/entry"
|
9
|
+
require "eiwa/tag/spelling"
|
10
|
+
require "eiwa/tag/reading"
|
11
|
+
require "eiwa/tag/meaning"
|
12
|
+
require "eiwa/tag/entity"
|
13
|
+
require "eiwa/tag/cross_reference"
|
14
|
+
require "eiwa/tag/antonym"
|
15
|
+
require "eiwa/tag/source_language"
|
16
|
+
require "eiwa/tag/definition"
|
17
|
+
require "eiwa/tag/other"
|
18
|
+
|
19
|
+
require "eiwa/parses_file"
|
3
20
|
|
4
21
|
module Eiwa
|
5
22
|
class Error < StandardError; end
|
6
23
|
|
7
24
|
def self.parse_file(filename, type: :jmdict_e, &each_entry_block)
|
8
|
-
|
9
|
-
when :jmdict_e
|
10
|
-
ParsesJmdictFile.new.call(filename, each_entry_block)
|
11
|
-
else
|
12
|
-
raise Eiwa::Error.new("Unknown file type: #{type}")
|
13
|
-
end
|
25
|
+
ParsesFile.new.call(filename, type, each_entry_block)
|
14
26
|
end
|
15
27
|
end
|
metadata
CHANGED
@@ -1,100 +1,30 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: eiwa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Justin Searls
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-03-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: bundler
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '1.17'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '1.17'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rake
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - "~>"
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '13.0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - "~>"
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '13.0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: minitest
|
57
15
|
requirement: !ruby/object:Gem::Requirement
|
58
16
|
requirements:
|
59
17
|
- - "~>"
|
60
18
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
62
|
-
type: :
|
19
|
+
version: 1.15.5
|
20
|
+
type: :runtime
|
63
21
|
prerelease: false
|
64
22
|
version_requirements: !ruby/object:Gem::Requirement
|
65
23
|
requirements:
|
66
24
|
- - "~>"
|
67
25
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
69
|
-
|
70
|
-
name: standard
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '0'
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: pry
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
|
-
description:
|
26
|
+
version: 1.15.5
|
27
|
+
description:
|
98
28
|
email:
|
99
29
|
- searls@gmail.com
|
100
30
|
executables: []
|
@@ -103,6 +33,7 @@ extra_rdoc_files: []
|
|
103
33
|
files:
|
104
34
|
- ".github/workflows/ruby.yml"
|
105
35
|
- ".gitignore"
|
36
|
+
- ".standard.yml"
|
106
37
|
- Gemfile
|
107
38
|
- Gemfile.lock
|
108
39
|
- LICENSE.txt
|
@@ -112,18 +43,23 @@ files:
|
|
112
43
|
- bin/setup
|
113
44
|
- eiwa.gemspec
|
114
45
|
- lib/eiwa.rb
|
115
|
-
- lib/eiwa/
|
116
|
-
- lib/eiwa/
|
117
|
-
- lib/eiwa/
|
46
|
+
- lib/eiwa/jmdict/doc.rb
|
47
|
+
- lib/eiwa/jmdict/entities.rb
|
48
|
+
- lib/eiwa/kanjidic/doc.rb
|
49
|
+
- lib/eiwa/parses_file.rb
|
118
50
|
- lib/eiwa/tag/antonym.rb
|
119
51
|
- lib/eiwa/tag/any.rb
|
52
|
+
- lib/eiwa/tag/bag.rb
|
53
|
+
- lib/eiwa/tag/character.rb
|
120
54
|
- lib/eiwa/tag/cross_reference.rb
|
121
55
|
- lib/eiwa/tag/definition.rb
|
122
56
|
- lib/eiwa/tag/entity.rb
|
123
57
|
- lib/eiwa/tag/entry.rb
|
58
|
+
- lib/eiwa/tag/list.rb
|
124
59
|
- lib/eiwa/tag/meaning.rb
|
125
60
|
- lib/eiwa/tag/other.rb
|
126
61
|
- lib/eiwa/tag/reading.rb
|
62
|
+
- lib/eiwa/tag/reading_meaning.rb
|
127
63
|
- lib/eiwa/tag/source_language.rb
|
128
64
|
- lib/eiwa/tag/spelling.rb
|
129
65
|
- lib/eiwa/version.rb
|
@@ -133,7 +69,7 @@ homepage: https://github.com/searls/eiwa
|
|
133
69
|
licenses:
|
134
70
|
- MIT
|
135
71
|
metadata: {}
|
136
|
-
post_install_message:
|
72
|
+
post_install_message:
|
137
73
|
rdoc_options: []
|
138
74
|
require_paths:
|
139
75
|
- lib
|
@@ -148,8 +84,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
148
84
|
- !ruby/object:Gem::Version
|
149
85
|
version: '0'
|
150
86
|
requirements: []
|
151
|
-
rubygems_version: 3.
|
152
|
-
signing_key:
|
87
|
+
rubygems_version: 3.5.4
|
88
|
+
signing_key:
|
153
89
|
specification_version: 4
|
154
90
|
summary: Parses the JMDict Japanese-English dictionary
|
155
91
|
test_files: []
|
data/lib/eiwa/jmdict_doc.rb
DELETED
@@ -1,93 +0,0 @@
|
|
1
|
-
require_relative "tag/entry"
|
2
|
-
require_relative "tag/spelling"
|
3
|
-
require_relative "tag/reading"
|
4
|
-
require_relative "tag/meaning"
|
5
|
-
require_relative "tag/entity"
|
6
|
-
require_relative "tag/cross_reference"
|
7
|
-
require_relative "tag/antonym"
|
8
|
-
require_relative "tag/source_language"
|
9
|
-
require_relative "tag/definition"
|
10
|
-
require_relative "tag/other"
|
11
|
-
|
12
|
-
require_relative "jmdict_entities"
|
13
|
-
|
14
|
-
module Eiwa
|
15
|
-
TAGS = {
|
16
|
-
"entry" => Tag::Entry,
|
17
|
-
"k_ele" => Tag::Spelling,
|
18
|
-
"r_ele" => Tag::Reading,
|
19
|
-
"sense" => Tag::Meaning,
|
20
|
-
"pos" => Tag::Entity,
|
21
|
-
"misc" => Tag::Entity,
|
22
|
-
"dial" => Tag::Entity,
|
23
|
-
"field" => Tag::Entity,
|
24
|
-
"ke_inf" => Tag::Entity,
|
25
|
-
"re_inf" => Tag::Entity,
|
26
|
-
"xref" => Tag::CrossReference,
|
27
|
-
"ant" => Tag::Antonym,
|
28
|
-
"lsource" => Tag::SourceLanguage,
|
29
|
-
"gloss" => Tag::Definition
|
30
|
-
}
|
31
|
-
|
32
|
-
class JmdictDoc < Nokogiri::XML::SAX::Document
|
33
|
-
def initialize(each_entry_block)
|
34
|
-
@each_entry_block = each_entry_block
|
35
|
-
end
|
36
|
-
|
37
|
-
def start_document
|
38
|
-
end
|
39
|
-
|
40
|
-
def end_document
|
41
|
-
end
|
42
|
-
|
43
|
-
def start_element(name, attrs)
|
44
|
-
parent = @current
|
45
|
-
@current = (TAGS[name] || Tag::Other).new
|
46
|
-
@current.start(name, attrs, parent)
|
47
|
-
end
|
48
|
-
|
49
|
-
def end_element(name)
|
50
|
-
raise Eiwa::Error.new("Parsing error. Expected <#{@current.tag_name}> to close before <#{name}>") if @current.tag_name != name
|
51
|
-
ending = @current
|
52
|
-
ending.end_self
|
53
|
-
if ending.is_a?(Tag::Entry)
|
54
|
-
@each_entry_block&.call(ending)
|
55
|
-
end
|
56
|
-
|
57
|
-
@current = ending.parent
|
58
|
-
@current&.end_child(ending)
|
59
|
-
end
|
60
|
-
|
61
|
-
def characters(s)
|
62
|
-
@current.add_characters(s)
|
63
|
-
end
|
64
|
-
|
65
|
-
# def comment string
|
66
|
-
# puts "comment #{string}"
|
67
|
-
# end
|
68
|
-
|
69
|
-
# def warning string
|
70
|
-
# puts "warning #{string}"
|
71
|
-
# end
|
72
|
-
|
73
|
-
def error(msg)
|
74
|
-
if (matches = msg.match(/Entity '([\S]+)' not defined/))
|
75
|
-
# See: http://github.com/sparklemotion/nokogiri/issues/1926
|
76
|
-
code = matches[1]
|
77
|
-
@current.set_entity(code, JMDICT_ENTITIES[code])
|
78
|
-
elsif msg == "Detected an entity reference loop\n"
|
79
|
-
# Do nothing and hope this does not matter.
|
80
|
-
else
|
81
|
-
raise Eiwa::Error.new("Parsing error: #{msg}")
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
# def cdata_block string
|
86
|
-
# puts "cdata_block #{string}"
|
87
|
-
# end
|
88
|
-
|
89
|
-
# def processing_instruction name, content
|
90
|
-
# puts "processing_instruction #{name}, #{content}"
|
91
|
-
# end
|
92
|
-
end
|
93
|
-
end
|
data/lib/eiwa/jmdict_entities.rb
DELETED
@@ -1,178 +0,0 @@
|
|
1
|
-
module Eiwa
|
2
|
-
JMDICT_ENTITIES = {
|
3
|
-
"Buddh" => "Buddhist term",
|
4
|
-
"MA" => "martial arts term",
|
5
|
-
"Shinto" => "Shinto term",
|
6
|
-
"X" => "rude or X-rated term (not displayed in educational software)",
|
7
|
-
"abbr" => "abbreviation",
|
8
|
-
"adj-f" => "noun or verb acting prenominally",
|
9
|
-
"adj-i" => "adjective (keiyoushi)",
|
10
|
-
"adj-ix" => "adjective (keiyoushi) - yoi/ii class",
|
11
|
-
"adj-kari" => "`kari' adjective (archaic)",
|
12
|
-
"adj-ku" => "`ku' adjective (archaic)",
|
13
|
-
"adj-na" => "adjectival nouns or quasi-adjectives (keiyodoshi)",
|
14
|
-
"adj-nari" => "archaic/formal form of na-adjective",
|
15
|
-
"adj-no" => "nouns which may take the genitive case particle `no'",
|
16
|
-
"adj-pn" => "pre-noun adjectival (rentaishi)",
|
17
|
-
"adj-shiku" => "`shiku' adjective (archaic)",
|
18
|
-
"adj-t" => "`taru' adjective",
|
19
|
-
"adv" => "adverb (fukushi)",
|
20
|
-
"adv-to" => "adverb taking the `to' particle",
|
21
|
-
"anat" => "anatomical term",
|
22
|
-
"arch" => "archaism",
|
23
|
-
"archit" => "architecture term",
|
24
|
-
"astron" => "astronomy, etc. term",
|
25
|
-
"ateji" => "ateji (phonetic) reading",
|
26
|
-
"aux" => "auxiliary",
|
27
|
-
"aux-adj" => "auxiliary adjective",
|
28
|
-
"aux-v" => "auxiliary verb",
|
29
|
-
"baseb" => "baseball term",
|
30
|
-
"biol" => "biology term",
|
31
|
-
"bot" => "botany term",
|
32
|
-
"bus" => "business term",
|
33
|
-
"chem" => "chemistry term",
|
34
|
-
"chn" => "children's language",
|
35
|
-
"col" => "colloquialism",
|
36
|
-
"comp" => "computer terminology",
|
37
|
-
"conj" => "conjunction",
|
38
|
-
"cop" => "copula",
|
39
|
-
"cop-da" => "copula",
|
40
|
-
"ctr" => "counter",
|
41
|
-
"derog" => "derogatory",
|
42
|
-
"eK" => "exclusively kanji",
|
43
|
-
"econ" => "economics term",
|
44
|
-
"ek" => "exclusively kana",
|
45
|
-
"engr" => "engineering term",
|
46
|
-
"exp" => "expressions (phrases, clauses, etc.)",
|
47
|
-
"fam" => "familiar language",
|
48
|
-
"fem" => "female term or language",
|
49
|
-
"finc" => "finance term",
|
50
|
-
"food" => "food term",
|
51
|
-
"geol" => "geology, etc. term",
|
52
|
-
"geom" => "geometry term",
|
53
|
-
"gikun" => "gikun (meaning as reading) or jukujikun (special kanji reading)",
|
54
|
-
"hob" => "Hokkaido-ben",
|
55
|
-
"hon" => "honorific or respectful (sonkeigo) language",
|
56
|
-
"hum" => "humble (kenjougo) language",
|
57
|
-
"iK" => "word containing irregular kanji usage",
|
58
|
-
"id" => "idiomatic expression",
|
59
|
-
"ik" => "word containing irregular kana usage",
|
60
|
-
"int" => "interjection (kandoushi)",
|
61
|
-
"io" => "irregular okurigana usage",
|
62
|
-
"iv" => "irregular verb",
|
63
|
-
"joc" => "jocular, humorous term",
|
64
|
-
"ksb" => "Kansai-ben",
|
65
|
-
"ktb" => "Kantou-ben",
|
66
|
-
"kyb" => "Kyoto-ben",
|
67
|
-
"kyu" => "Kyuushuu-ben",
|
68
|
-
"law" => "law, etc. term",
|
69
|
-
"ling" => "linguistics terminology",
|
70
|
-
"m-sl" => "manga slang",
|
71
|
-
"mahj" => "mahjong term",
|
72
|
-
"male" => "male term or language",
|
73
|
-
"male-sl" => "male slang",
|
74
|
-
"math" => "mathematics",
|
75
|
-
"med" => "medicine, etc. term",
|
76
|
-
"mil" => "military",
|
77
|
-
"music" => "music term",
|
78
|
-
"n" => "noun (common) (futsuumeishi)",
|
79
|
-
"n-adv" => "adverbial noun (fukushitekimeishi)",
|
80
|
-
"n-pr" => "proper noun",
|
81
|
-
"n-pref" => "noun, used as a prefix",
|
82
|
-
"n-suf" => "noun, used as a suffix",
|
83
|
-
"n-t" => "noun (temporal) (jisoumeishi)",
|
84
|
-
"nab" => "Nagano-ben",
|
85
|
-
"num" => "numeric",
|
86
|
-
"oK" => "word containing out-dated kanji",
|
87
|
-
"obs" => "obsolete term",
|
88
|
-
"obsc" => "obscure term",
|
89
|
-
"oik" => "old or irregular kana form",
|
90
|
-
"ok" => "out-dated or obsolete kana usage",
|
91
|
-
"on-mim" => "onomatopoeic or mimetic word",
|
92
|
-
"osb" => "Osaka-ben",
|
93
|
-
"physics" => "physics terminology",
|
94
|
-
"pn" => "pronoun",
|
95
|
-
"poet" => "poetical term",
|
96
|
-
"pol" => "polite (teineigo) language",
|
97
|
-
"pref" => "prefix",
|
98
|
-
"proverb" => "proverb",
|
99
|
-
"prt" => "particle",
|
100
|
-
"quote" => "quotation",
|
101
|
-
"rare" => "rare",
|
102
|
-
"rkb" => "Ryuukyuu-ben",
|
103
|
-
"sens" => "sensitive",
|
104
|
-
"shogi" => "shogi term",
|
105
|
-
"sl" => "slang",
|
106
|
-
"sports" => "sports term",
|
107
|
-
"suf" => "suffix",
|
108
|
-
"sumo" => "sumo term",
|
109
|
-
"thb" => "Touhoku-ben",
|
110
|
-
"tsb" => "Tosa-ben",
|
111
|
-
"tsug" => "Tsugaru-ben",
|
112
|
-
"uK" => "word usually written using kanji alone",
|
113
|
-
"uk" => "word usually written using kana alone",
|
114
|
-
"unc" => "unclassified",
|
115
|
-
"v-unspec" => "verb unspecified",
|
116
|
-
"v1" => "Ichidan verb",
|
117
|
-
"v1-s" => "Ichidan verb - kureru special class",
|
118
|
-
"v2a-s" => "Nidan verb with 'u' ending (archaic)",
|
119
|
-
"v2b-k" => "Nidan verb (upper class) with `bu' ending (archaic)",
|
120
|
-
"v2b-s" => "Nidan verb (lower class) with `bu' ending (archaic)",
|
121
|
-
"v2d-k" => "Nidan verb (upper class) with `dzu' ending (archaic)",
|
122
|
-
"v2d-s" => "Nidan verb (lower class) with `dzu' ending (archaic)",
|
123
|
-
"v2g-k" => "Nidan verb (upper class) with `gu' ending (archaic)",
|
124
|
-
"v2g-s" => "Nidan verb (lower class) with `gu' ending (archaic)",
|
125
|
-
"v2h-k" => "Nidan verb (upper class) with `hu/fu' ending (archaic)",
|
126
|
-
"v2h-s" => "Nidan verb (lower class) with `hu/fu' ending (archaic)",
|
127
|
-
"v2k-k" => "Nidan verb (upper class) with `ku' ending (archaic)",
|
128
|
-
"v2k-s" => "Nidan verb (lower class) with `ku' ending (archaic)",
|
129
|
-
"v2m-k" => "Nidan verb (upper class) with `mu' ending (archaic)",
|
130
|
-
"v2m-s" => "Nidan verb (lower class) with `mu' ending (archaic)",
|
131
|
-
"v2n-s" => "Nidan verb (lower class) with `nu' ending (archaic)",
|
132
|
-
"v2r-k" => "Nidan verb (upper class) with `ru' ending (archaic)",
|
133
|
-
"v2r-s" => "Nidan verb (lower class) with `ru' ending (archaic)",
|
134
|
-
"v2s-s" => "Nidan verb (lower class) with `su' ending (archaic)",
|
135
|
-
"v2t-k" => "Nidan verb (upper class) with `tsu' ending (archaic)",
|
136
|
-
"v2t-s" => "Nidan verb (lower class) with `tsu' ending (archaic)",
|
137
|
-
"v2w-s" => "Nidan verb (lower class) with `u' ending and `we' conjugation (archaic)",
|
138
|
-
"v2y-k" => "Nidan verb (upper class) with `yu' ending (archaic)",
|
139
|
-
"v2y-s" => "Nidan verb (lower class) with `yu' ending (archaic)",
|
140
|
-
"v2z-s" => "Nidan verb (lower class) with `zu' ending (archaic)",
|
141
|
-
"v4b" => "Yodan verb with `bu' ending (archaic)",
|
142
|
-
"v4g" => "Yodan verb with `gu' ending (archaic)",
|
143
|
-
"v4h" => "Yodan verb with `hu/fu' ending (archaic)",
|
144
|
-
"v4k" => "Yodan verb with `ku' ending (archaic)",
|
145
|
-
"v4m" => "Yodan verb with `mu' ending (archaic)",
|
146
|
-
"v4n" => "Yodan verb with `nu' ending (archaic)",
|
147
|
-
"v4r" => "Yodan verb with `ru' ending (archaic)",
|
148
|
-
"v4s" => "Yodan verb with `su' ending (archaic)",
|
149
|
-
"v4t" => "Yodan verb with `tsu' ending (archaic)",
|
150
|
-
"v5aru" => "Godan verb - -aru special class",
|
151
|
-
"v5b" => "Godan verb with `bu' ending",
|
152
|
-
"v5g" => "Godan verb with `gu' ending",
|
153
|
-
"v5k" => "Godan verb with `ku' ending",
|
154
|
-
"v5k-s" => "Godan verb - Iku/Yuku special class",
|
155
|
-
"v5m" => "Godan verb with `mu' ending",
|
156
|
-
"v5n" => "Godan verb with `nu' ending",
|
157
|
-
"v5r" => "Godan verb with `ru' ending",
|
158
|
-
"v5r-i" => "Godan verb with `ru' ending (irregular verb)",
|
159
|
-
"v5s" => "Godan verb with `su' ending",
|
160
|
-
"v5t" => "Godan verb with `tsu' ending",
|
161
|
-
"v5u" => "Godan verb with `u' ending",
|
162
|
-
"v5u-s" => "Godan verb with `u' ending (special class)",
|
163
|
-
"v5uru" => "Godan verb - Uru old class verb (old form of Eru)",
|
164
|
-
"vi" => "intransitive verb",
|
165
|
-
"vk" => "Kuru verb - special class",
|
166
|
-
"vn" => "irregular nu verb",
|
167
|
-
"vr" => "irregular ru verb, plain form ends with -ri",
|
168
|
-
"vs" => "noun or participle which takes the aux. verb suru",
|
169
|
-
"vs-c" => "su verb - precursor to the modern suru",
|
170
|
-
"vs-i" => "suru verb - included",
|
171
|
-
"vs-s" => "suru verb - special class",
|
172
|
-
"vt" => "transitive verb",
|
173
|
-
"vulg" => "vulgar expression or word",
|
174
|
-
"vz" => "Ichidan verb - zuru verb (alternative form of -jiru verbs)",
|
175
|
-
"yoji" => "yojijukugo",
|
176
|
-
"zool" => "zoology term"
|
177
|
-
}
|
178
|
-
end
|
@@ -1,21 +0,0 @@
|
|
1
|
-
require "nokogiri"
|
2
|
-
require_relative "jmdict_doc"
|
3
|
-
|
4
|
-
module Eiwa
|
5
|
-
class ParsesJmdictFile
|
6
|
-
def call(filename, each_entry_block)
|
7
|
-
if each_entry_block.nil?
|
8
|
-
entries = []
|
9
|
-
each_entry_block ||= ->(e) { entries << e }
|
10
|
-
end
|
11
|
-
|
12
|
-
JmdictDoc.new(each_entry_block).tap do |doc|
|
13
|
-
Nokogiri::XML::SAX::Parser.new(doc).parse_file(filename) do |ctx|
|
14
|
-
ctx.recovery = true
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
entries
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|