eiwa 0.0.2 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +5 -8
- data/.standard.yml +1 -0
- data/Gemfile +5 -1
- data/Gemfile.lock +61 -35
- data/README.md +22 -13
- data/eiwa.gemspec +1 -6
- data/lib/eiwa/jmdict/doc.rb +85 -0
- data/lib/eiwa/jmdict/entities.rb +180 -0
- data/lib/eiwa/kanjidic/doc.rb +43 -0
- data/lib/eiwa/parses_file.rb +35 -0
- data/lib/eiwa/tag/antonym.rb +2 -2
- data/lib/eiwa/tag/any.rb +1 -1
- data/lib/eiwa/tag/bag.rb +21 -0
- data/lib/eiwa/tag/character.rb +24 -0
- data/lib/eiwa/tag/cross_reference.rb +3 -3
- data/lib/eiwa/tag/definition.rb +2 -2
- data/lib/eiwa/tag/entity.rb +2 -4
- data/lib/eiwa/tag/entry.rb +0 -2
- data/lib/eiwa/tag/list.rb +18 -0
- data/lib/eiwa/tag/meaning.rb +0 -2
- data/lib/eiwa/tag/other.rb +5 -3
- data/lib/eiwa/tag/reading.rb +0 -2
- data/lib/eiwa/tag/reading_meaning.rb +11 -0
- data/lib/eiwa/tag/source_language.rb +2 -2
- data/lib/eiwa/tag/spelling.rb +0 -2
- data/lib/eiwa/version.rb +1 -1
- data/lib/eiwa.rb +19 -7
- metadata +19 -83
- data/lib/eiwa/jmdict_doc.rb +0 -93
- data/lib/eiwa/jmdict_entities.rb +0 -178
- data/lib/eiwa/parses_jmdict_file.rb +0 -21
@@ -12,7 +12,7 @@ module Eiwa
|
|
12
12
|
def end_self
|
13
13
|
parts = @characters.split("・")
|
14
14
|
@text = parts.first
|
15
|
-
@reading = parts[1
|
15
|
+
@reading = parts[1..].find { |part| /[^0-9]/.match(part) }
|
16
16
|
@sense_ordinal = parts.find { |part| /^[0-9]+$/.match(part) }&.to_i
|
17
17
|
end
|
18
18
|
|
@@ -21,10 +21,10 @@ module Eiwa
|
|
21
21
|
@reading == other.reading &&
|
22
22
|
@sense_ordinal == other.sense_ordinal
|
23
23
|
end
|
24
|
-
|
24
|
+
alias_method :==, :eql?
|
25
25
|
|
26
26
|
def hash
|
27
|
-
@text
|
27
|
+
[@text, @reading, @sense_ordinal].hash
|
28
28
|
end
|
29
29
|
end
|
30
30
|
end
|
data/lib/eiwa/tag/definition.rb
CHANGED
@@ -35,10 +35,10 @@ module Eiwa
|
|
35
35
|
@gender == other.gender &&
|
36
36
|
@type == other.type
|
37
37
|
end
|
38
|
-
|
38
|
+
alias_method :==, :eql?
|
39
39
|
|
40
40
|
def hash
|
41
|
-
@text
|
41
|
+
[@text, @language, @gender, @type].hash
|
42
42
|
end
|
43
43
|
end
|
44
44
|
end
|
data/lib/eiwa/tag/entity.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require_relative "any"
|
2
|
-
|
3
1
|
module Eiwa
|
4
2
|
module Tag
|
5
3
|
class Entity < Any
|
@@ -19,10 +17,10 @@ module Eiwa
|
|
19
17
|
@code == other.code &&
|
20
18
|
@text == other.text
|
21
19
|
end
|
22
|
-
|
20
|
+
alias_method :==, :eql?
|
23
21
|
|
24
22
|
def hash
|
25
|
-
@code
|
23
|
+
[@code, @text].hash
|
26
24
|
end
|
27
25
|
end
|
28
26
|
end
|
data/lib/eiwa/tag/entry.rb
CHANGED
@@ -0,0 +1,18 @@
|
|
1
|
+
module Eiwa
|
2
|
+
module Tag
|
3
|
+
# For containers of lists or repeated elements
|
4
|
+
class List < Any
|
5
|
+
Item = Struct.new(:name, :attrs, :text, keyword_init: true)
|
6
|
+
|
7
|
+
attr_reader :items
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@items = []
|
11
|
+
end
|
12
|
+
|
13
|
+
def end_child(child)
|
14
|
+
@items << Item.new(name: child.tag_name, attrs: child.attrs, text: child.text)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/eiwa/tag/meaning.rb
CHANGED
data/lib/eiwa/tag/other.rb
CHANGED
data/lib/eiwa/tag/reading.rb
CHANGED
@@ -23,10 +23,10 @@ module Eiwa
|
|
23
23
|
@wasei == other.wasei &&
|
24
24
|
@type == other.type
|
25
25
|
end
|
26
|
-
|
26
|
+
alias_method :==, :eql?
|
27
27
|
|
28
28
|
def hash
|
29
|
-
@text
|
29
|
+
[@text, @language, @wasei, @type].hash
|
30
30
|
end
|
31
31
|
end
|
32
32
|
end
|
data/lib/eiwa/tag/spelling.rb
CHANGED
data/lib/eiwa/version.rb
CHANGED
data/lib/eiwa.rb
CHANGED
@@ -1,15 +1,27 @@
|
|
1
1
|
require "eiwa/version"
|
2
|
-
|
2
|
+
|
3
|
+
require "eiwa/tag/any"
|
4
|
+
require "eiwa/tag/character"
|
5
|
+
require "eiwa/tag/bag"
|
6
|
+
require "eiwa/tag/list"
|
7
|
+
require "eiwa/tag/reading_meaning"
|
8
|
+
require "eiwa/tag/entry"
|
9
|
+
require "eiwa/tag/spelling"
|
10
|
+
require "eiwa/tag/reading"
|
11
|
+
require "eiwa/tag/meaning"
|
12
|
+
require "eiwa/tag/entity"
|
13
|
+
require "eiwa/tag/cross_reference"
|
14
|
+
require "eiwa/tag/antonym"
|
15
|
+
require "eiwa/tag/source_language"
|
16
|
+
require "eiwa/tag/definition"
|
17
|
+
require "eiwa/tag/other"
|
18
|
+
|
19
|
+
require "eiwa/parses_file"
|
3
20
|
|
4
21
|
module Eiwa
|
5
22
|
class Error < StandardError; end
|
6
23
|
|
7
24
|
def self.parse_file(filename, type: :jmdict_e, &each_entry_block)
|
8
|
-
|
9
|
-
when :jmdict_e
|
10
|
-
ParsesJmdictFile.new.call(filename, each_entry_block)
|
11
|
-
else
|
12
|
-
raise Eiwa::Error.new("Unknown file type: #{type}")
|
13
|
-
end
|
25
|
+
ParsesFile.new.call(filename, type, each_entry_block)
|
14
26
|
end
|
15
27
|
end
|
metadata
CHANGED
@@ -1,100 +1,30 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: eiwa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Justin Searls
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-03-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: bundler
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '1.17'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '1.17'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rake
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - "~>"
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '13.0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - "~>"
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '13.0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: minitest
|
57
15
|
requirement: !ruby/object:Gem::Requirement
|
58
16
|
requirements:
|
59
17
|
- - "~>"
|
60
18
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
62
|
-
type: :
|
19
|
+
version: 1.15.5
|
20
|
+
type: :runtime
|
63
21
|
prerelease: false
|
64
22
|
version_requirements: !ruby/object:Gem::Requirement
|
65
23
|
requirements:
|
66
24
|
- - "~>"
|
67
25
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
69
|
-
|
70
|
-
name: standard
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '0'
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: pry
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
|
-
description:
|
26
|
+
version: 1.15.5
|
27
|
+
description:
|
98
28
|
email:
|
99
29
|
- searls@gmail.com
|
100
30
|
executables: []
|
@@ -103,6 +33,7 @@ extra_rdoc_files: []
|
|
103
33
|
files:
|
104
34
|
- ".github/workflows/ruby.yml"
|
105
35
|
- ".gitignore"
|
36
|
+
- ".standard.yml"
|
106
37
|
- Gemfile
|
107
38
|
- Gemfile.lock
|
108
39
|
- LICENSE.txt
|
@@ -112,18 +43,23 @@ files:
|
|
112
43
|
- bin/setup
|
113
44
|
- eiwa.gemspec
|
114
45
|
- lib/eiwa.rb
|
115
|
-
- lib/eiwa/
|
116
|
-
- lib/eiwa/
|
117
|
-
- lib/eiwa/
|
46
|
+
- lib/eiwa/jmdict/doc.rb
|
47
|
+
- lib/eiwa/jmdict/entities.rb
|
48
|
+
- lib/eiwa/kanjidic/doc.rb
|
49
|
+
- lib/eiwa/parses_file.rb
|
118
50
|
- lib/eiwa/tag/antonym.rb
|
119
51
|
- lib/eiwa/tag/any.rb
|
52
|
+
- lib/eiwa/tag/bag.rb
|
53
|
+
- lib/eiwa/tag/character.rb
|
120
54
|
- lib/eiwa/tag/cross_reference.rb
|
121
55
|
- lib/eiwa/tag/definition.rb
|
122
56
|
- lib/eiwa/tag/entity.rb
|
123
57
|
- lib/eiwa/tag/entry.rb
|
58
|
+
- lib/eiwa/tag/list.rb
|
124
59
|
- lib/eiwa/tag/meaning.rb
|
125
60
|
- lib/eiwa/tag/other.rb
|
126
61
|
- lib/eiwa/tag/reading.rb
|
62
|
+
- lib/eiwa/tag/reading_meaning.rb
|
127
63
|
- lib/eiwa/tag/source_language.rb
|
128
64
|
- lib/eiwa/tag/spelling.rb
|
129
65
|
- lib/eiwa/version.rb
|
@@ -133,7 +69,7 @@ homepage: https://github.com/searls/eiwa
|
|
133
69
|
licenses:
|
134
70
|
- MIT
|
135
71
|
metadata: {}
|
136
|
-
post_install_message:
|
72
|
+
post_install_message:
|
137
73
|
rdoc_options: []
|
138
74
|
require_paths:
|
139
75
|
- lib
|
@@ -148,8 +84,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
148
84
|
- !ruby/object:Gem::Version
|
149
85
|
version: '0'
|
150
86
|
requirements: []
|
151
|
-
rubygems_version: 3.
|
152
|
-
signing_key:
|
87
|
+
rubygems_version: 3.5.4
|
88
|
+
signing_key:
|
153
89
|
specification_version: 4
|
154
90
|
summary: Parses the JMDict Japanese-English dictionary
|
155
91
|
test_files: []
|
data/lib/eiwa/jmdict_doc.rb
DELETED
@@ -1,93 +0,0 @@
|
|
1
|
-
require_relative "tag/entry"
|
2
|
-
require_relative "tag/spelling"
|
3
|
-
require_relative "tag/reading"
|
4
|
-
require_relative "tag/meaning"
|
5
|
-
require_relative "tag/entity"
|
6
|
-
require_relative "tag/cross_reference"
|
7
|
-
require_relative "tag/antonym"
|
8
|
-
require_relative "tag/source_language"
|
9
|
-
require_relative "tag/definition"
|
10
|
-
require_relative "tag/other"
|
11
|
-
|
12
|
-
require_relative "jmdict_entities"
|
13
|
-
|
14
|
-
module Eiwa
|
15
|
-
TAGS = {
|
16
|
-
"entry" => Tag::Entry,
|
17
|
-
"k_ele" => Tag::Spelling,
|
18
|
-
"r_ele" => Tag::Reading,
|
19
|
-
"sense" => Tag::Meaning,
|
20
|
-
"pos" => Tag::Entity,
|
21
|
-
"misc" => Tag::Entity,
|
22
|
-
"dial" => Tag::Entity,
|
23
|
-
"field" => Tag::Entity,
|
24
|
-
"ke_inf" => Tag::Entity,
|
25
|
-
"re_inf" => Tag::Entity,
|
26
|
-
"xref" => Tag::CrossReference,
|
27
|
-
"ant" => Tag::Antonym,
|
28
|
-
"lsource" => Tag::SourceLanguage,
|
29
|
-
"gloss" => Tag::Definition
|
30
|
-
}
|
31
|
-
|
32
|
-
class JmdictDoc < Nokogiri::XML::SAX::Document
|
33
|
-
def initialize(each_entry_block)
|
34
|
-
@each_entry_block = each_entry_block
|
35
|
-
end
|
36
|
-
|
37
|
-
def start_document
|
38
|
-
end
|
39
|
-
|
40
|
-
def end_document
|
41
|
-
end
|
42
|
-
|
43
|
-
def start_element(name, attrs)
|
44
|
-
parent = @current
|
45
|
-
@current = (TAGS[name] || Tag::Other).new
|
46
|
-
@current.start(name, attrs, parent)
|
47
|
-
end
|
48
|
-
|
49
|
-
def end_element(name)
|
50
|
-
raise Eiwa::Error.new("Parsing error. Expected <#{@current.tag_name}> to close before <#{name}>") if @current.tag_name != name
|
51
|
-
ending = @current
|
52
|
-
ending.end_self
|
53
|
-
if ending.is_a?(Tag::Entry)
|
54
|
-
@each_entry_block&.call(ending)
|
55
|
-
end
|
56
|
-
|
57
|
-
@current = ending.parent
|
58
|
-
@current&.end_child(ending)
|
59
|
-
end
|
60
|
-
|
61
|
-
def characters(s)
|
62
|
-
@current.add_characters(s)
|
63
|
-
end
|
64
|
-
|
65
|
-
# def comment string
|
66
|
-
# puts "comment #{string}"
|
67
|
-
# end
|
68
|
-
|
69
|
-
# def warning string
|
70
|
-
# puts "warning #{string}"
|
71
|
-
# end
|
72
|
-
|
73
|
-
def error(msg)
|
74
|
-
if (matches = msg.match(/Entity '([\S]+)' not defined/))
|
75
|
-
# See: http://github.com/sparklemotion/nokogiri/issues/1926
|
76
|
-
code = matches[1]
|
77
|
-
@current.set_entity(code, JMDICT_ENTITIES[code])
|
78
|
-
elsif msg == "Detected an entity reference loop\n"
|
79
|
-
# Do nothing and hope this does not matter.
|
80
|
-
else
|
81
|
-
raise Eiwa::Error.new("Parsing error: #{msg}")
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
# def cdata_block string
|
86
|
-
# puts "cdata_block #{string}"
|
87
|
-
# end
|
88
|
-
|
89
|
-
# def processing_instruction name, content
|
90
|
-
# puts "processing_instruction #{name}, #{content}"
|
91
|
-
# end
|
92
|
-
end
|
93
|
-
end
|
data/lib/eiwa/jmdict_entities.rb
DELETED
@@ -1,178 +0,0 @@
|
|
1
|
-
module Eiwa
|
2
|
-
JMDICT_ENTITIES = {
|
3
|
-
"Buddh" => "Buddhist term",
|
4
|
-
"MA" => "martial arts term",
|
5
|
-
"Shinto" => "Shinto term",
|
6
|
-
"X" => "rude or X-rated term (not displayed in educational software)",
|
7
|
-
"abbr" => "abbreviation",
|
8
|
-
"adj-f" => "noun or verb acting prenominally",
|
9
|
-
"adj-i" => "adjective (keiyoushi)",
|
10
|
-
"adj-ix" => "adjective (keiyoushi) - yoi/ii class",
|
11
|
-
"adj-kari" => "`kari' adjective (archaic)",
|
12
|
-
"adj-ku" => "`ku' adjective (archaic)",
|
13
|
-
"adj-na" => "adjectival nouns or quasi-adjectives (keiyodoshi)",
|
14
|
-
"adj-nari" => "archaic/formal form of na-adjective",
|
15
|
-
"adj-no" => "nouns which may take the genitive case particle `no'",
|
16
|
-
"adj-pn" => "pre-noun adjectival (rentaishi)",
|
17
|
-
"adj-shiku" => "`shiku' adjective (archaic)",
|
18
|
-
"adj-t" => "`taru' adjective",
|
19
|
-
"adv" => "adverb (fukushi)",
|
20
|
-
"adv-to" => "adverb taking the `to' particle",
|
21
|
-
"anat" => "anatomical term",
|
22
|
-
"arch" => "archaism",
|
23
|
-
"archit" => "architecture term",
|
24
|
-
"astron" => "astronomy, etc. term",
|
25
|
-
"ateji" => "ateji (phonetic) reading",
|
26
|
-
"aux" => "auxiliary",
|
27
|
-
"aux-adj" => "auxiliary adjective",
|
28
|
-
"aux-v" => "auxiliary verb",
|
29
|
-
"baseb" => "baseball term",
|
30
|
-
"biol" => "biology term",
|
31
|
-
"bot" => "botany term",
|
32
|
-
"bus" => "business term",
|
33
|
-
"chem" => "chemistry term",
|
34
|
-
"chn" => "children's language",
|
35
|
-
"col" => "colloquialism",
|
36
|
-
"comp" => "computer terminology",
|
37
|
-
"conj" => "conjunction",
|
38
|
-
"cop" => "copula",
|
39
|
-
"cop-da" => "copula",
|
40
|
-
"ctr" => "counter",
|
41
|
-
"derog" => "derogatory",
|
42
|
-
"eK" => "exclusively kanji",
|
43
|
-
"econ" => "economics term",
|
44
|
-
"ek" => "exclusively kana",
|
45
|
-
"engr" => "engineering term",
|
46
|
-
"exp" => "expressions (phrases, clauses, etc.)",
|
47
|
-
"fam" => "familiar language",
|
48
|
-
"fem" => "female term or language",
|
49
|
-
"finc" => "finance term",
|
50
|
-
"food" => "food term",
|
51
|
-
"geol" => "geology, etc. term",
|
52
|
-
"geom" => "geometry term",
|
53
|
-
"gikun" => "gikun (meaning as reading) or jukujikun (special kanji reading)",
|
54
|
-
"hob" => "Hokkaido-ben",
|
55
|
-
"hon" => "honorific or respectful (sonkeigo) language",
|
56
|
-
"hum" => "humble (kenjougo) language",
|
57
|
-
"iK" => "word containing irregular kanji usage",
|
58
|
-
"id" => "idiomatic expression",
|
59
|
-
"ik" => "word containing irregular kana usage",
|
60
|
-
"int" => "interjection (kandoushi)",
|
61
|
-
"io" => "irregular okurigana usage",
|
62
|
-
"iv" => "irregular verb",
|
63
|
-
"joc" => "jocular, humorous term",
|
64
|
-
"ksb" => "Kansai-ben",
|
65
|
-
"ktb" => "Kantou-ben",
|
66
|
-
"kyb" => "Kyoto-ben",
|
67
|
-
"kyu" => "Kyuushuu-ben",
|
68
|
-
"law" => "law, etc. term",
|
69
|
-
"ling" => "linguistics terminology",
|
70
|
-
"m-sl" => "manga slang",
|
71
|
-
"mahj" => "mahjong term",
|
72
|
-
"male" => "male term or language",
|
73
|
-
"male-sl" => "male slang",
|
74
|
-
"math" => "mathematics",
|
75
|
-
"med" => "medicine, etc. term",
|
76
|
-
"mil" => "military",
|
77
|
-
"music" => "music term",
|
78
|
-
"n" => "noun (common) (futsuumeishi)",
|
79
|
-
"n-adv" => "adverbial noun (fukushitekimeishi)",
|
80
|
-
"n-pr" => "proper noun",
|
81
|
-
"n-pref" => "noun, used as a prefix",
|
82
|
-
"n-suf" => "noun, used as a suffix",
|
83
|
-
"n-t" => "noun (temporal) (jisoumeishi)",
|
84
|
-
"nab" => "Nagano-ben",
|
85
|
-
"num" => "numeric",
|
86
|
-
"oK" => "word containing out-dated kanji",
|
87
|
-
"obs" => "obsolete term",
|
88
|
-
"obsc" => "obscure term",
|
89
|
-
"oik" => "old or irregular kana form",
|
90
|
-
"ok" => "out-dated or obsolete kana usage",
|
91
|
-
"on-mim" => "onomatopoeic or mimetic word",
|
92
|
-
"osb" => "Osaka-ben",
|
93
|
-
"physics" => "physics terminology",
|
94
|
-
"pn" => "pronoun",
|
95
|
-
"poet" => "poetical term",
|
96
|
-
"pol" => "polite (teineigo) language",
|
97
|
-
"pref" => "prefix",
|
98
|
-
"proverb" => "proverb",
|
99
|
-
"prt" => "particle",
|
100
|
-
"quote" => "quotation",
|
101
|
-
"rare" => "rare",
|
102
|
-
"rkb" => "Ryuukyuu-ben",
|
103
|
-
"sens" => "sensitive",
|
104
|
-
"shogi" => "shogi term",
|
105
|
-
"sl" => "slang",
|
106
|
-
"sports" => "sports term",
|
107
|
-
"suf" => "suffix",
|
108
|
-
"sumo" => "sumo term",
|
109
|
-
"thb" => "Touhoku-ben",
|
110
|
-
"tsb" => "Tosa-ben",
|
111
|
-
"tsug" => "Tsugaru-ben",
|
112
|
-
"uK" => "word usually written using kanji alone",
|
113
|
-
"uk" => "word usually written using kana alone",
|
114
|
-
"unc" => "unclassified",
|
115
|
-
"v-unspec" => "verb unspecified",
|
116
|
-
"v1" => "Ichidan verb",
|
117
|
-
"v1-s" => "Ichidan verb - kureru special class",
|
118
|
-
"v2a-s" => "Nidan verb with 'u' ending (archaic)",
|
119
|
-
"v2b-k" => "Nidan verb (upper class) with `bu' ending (archaic)",
|
120
|
-
"v2b-s" => "Nidan verb (lower class) with `bu' ending (archaic)",
|
121
|
-
"v2d-k" => "Nidan verb (upper class) with `dzu' ending (archaic)",
|
122
|
-
"v2d-s" => "Nidan verb (lower class) with `dzu' ending (archaic)",
|
123
|
-
"v2g-k" => "Nidan verb (upper class) with `gu' ending (archaic)",
|
124
|
-
"v2g-s" => "Nidan verb (lower class) with `gu' ending (archaic)",
|
125
|
-
"v2h-k" => "Nidan verb (upper class) with `hu/fu' ending (archaic)",
|
126
|
-
"v2h-s" => "Nidan verb (lower class) with `hu/fu' ending (archaic)",
|
127
|
-
"v2k-k" => "Nidan verb (upper class) with `ku' ending (archaic)",
|
128
|
-
"v2k-s" => "Nidan verb (lower class) with `ku' ending (archaic)",
|
129
|
-
"v2m-k" => "Nidan verb (upper class) with `mu' ending (archaic)",
|
130
|
-
"v2m-s" => "Nidan verb (lower class) with `mu' ending (archaic)",
|
131
|
-
"v2n-s" => "Nidan verb (lower class) with `nu' ending (archaic)",
|
132
|
-
"v2r-k" => "Nidan verb (upper class) with `ru' ending (archaic)",
|
133
|
-
"v2r-s" => "Nidan verb (lower class) with `ru' ending (archaic)",
|
134
|
-
"v2s-s" => "Nidan verb (lower class) with `su' ending (archaic)",
|
135
|
-
"v2t-k" => "Nidan verb (upper class) with `tsu' ending (archaic)",
|
136
|
-
"v2t-s" => "Nidan verb (lower class) with `tsu' ending (archaic)",
|
137
|
-
"v2w-s" => "Nidan verb (lower class) with `u' ending and `we' conjugation (archaic)",
|
138
|
-
"v2y-k" => "Nidan verb (upper class) with `yu' ending (archaic)",
|
139
|
-
"v2y-s" => "Nidan verb (lower class) with `yu' ending (archaic)",
|
140
|
-
"v2z-s" => "Nidan verb (lower class) with `zu' ending (archaic)",
|
141
|
-
"v4b" => "Yodan verb with `bu' ending (archaic)",
|
142
|
-
"v4g" => "Yodan verb with `gu' ending (archaic)",
|
143
|
-
"v4h" => "Yodan verb with `hu/fu' ending (archaic)",
|
144
|
-
"v4k" => "Yodan verb with `ku' ending (archaic)",
|
145
|
-
"v4m" => "Yodan verb with `mu' ending (archaic)",
|
146
|
-
"v4n" => "Yodan verb with `nu' ending (archaic)",
|
147
|
-
"v4r" => "Yodan verb with `ru' ending (archaic)",
|
148
|
-
"v4s" => "Yodan verb with `su' ending (archaic)",
|
149
|
-
"v4t" => "Yodan verb with `tsu' ending (archaic)",
|
150
|
-
"v5aru" => "Godan verb - -aru special class",
|
151
|
-
"v5b" => "Godan verb with `bu' ending",
|
152
|
-
"v5g" => "Godan verb with `gu' ending",
|
153
|
-
"v5k" => "Godan verb with `ku' ending",
|
154
|
-
"v5k-s" => "Godan verb - Iku/Yuku special class",
|
155
|
-
"v5m" => "Godan verb with `mu' ending",
|
156
|
-
"v5n" => "Godan verb with `nu' ending",
|
157
|
-
"v5r" => "Godan verb with `ru' ending",
|
158
|
-
"v5r-i" => "Godan verb with `ru' ending (irregular verb)",
|
159
|
-
"v5s" => "Godan verb with `su' ending",
|
160
|
-
"v5t" => "Godan verb with `tsu' ending",
|
161
|
-
"v5u" => "Godan verb with `u' ending",
|
162
|
-
"v5u-s" => "Godan verb with `u' ending (special class)",
|
163
|
-
"v5uru" => "Godan verb - Uru old class verb (old form of Eru)",
|
164
|
-
"vi" => "intransitive verb",
|
165
|
-
"vk" => "Kuru verb - special class",
|
166
|
-
"vn" => "irregular nu verb",
|
167
|
-
"vr" => "irregular ru verb, plain form ends with -ri",
|
168
|
-
"vs" => "noun or participle which takes the aux. verb suru",
|
169
|
-
"vs-c" => "su verb - precursor to the modern suru",
|
170
|
-
"vs-i" => "suru verb - included",
|
171
|
-
"vs-s" => "suru verb - special class",
|
172
|
-
"vt" => "transitive verb",
|
173
|
-
"vulg" => "vulgar expression or word",
|
174
|
-
"vz" => "Ichidan verb - zuru verb (alternative form of -jiru verbs)",
|
175
|
-
"yoji" => "yojijukugo",
|
176
|
-
"zool" => "zoology term"
|
177
|
-
}
|
178
|
-
end
|
@@ -1,21 +0,0 @@
|
|
1
|
-
require "nokogiri"
|
2
|
-
require_relative "jmdict_doc"
|
3
|
-
|
4
|
-
module Eiwa
|
5
|
-
class ParsesJmdictFile
|
6
|
-
def call(filename, each_entry_block)
|
7
|
-
if each_entry_block.nil?
|
8
|
-
entries = []
|
9
|
-
each_entry_block ||= ->(e) { entries << e }
|
10
|
-
end
|
11
|
-
|
12
|
-
JmdictDoc.new(each_entry_block).tap do |doc|
|
13
|
-
Nokogiri::XML::SAX::Parser.new(doc).parse_file(filename) do |ctx|
|
14
|
-
ctx.recovery = true
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
entries
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|