daidai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,93 @@
1
+ id kw descr ents
2
+ 1 adj-i adjective (keiyoushi)
3
+ 2 adj-na adjectival nouns or quasi-adjectives (keiyodoshi)
4
+ 3 adj-no nouns which may take the genitive case particle 'no'
5
+ 4 adj-pn pre-noun adjectival (rentaishi)
6
+ 5 adj-t 'taru' adjective
7
+ 6 adv adverb (fukushi)
8
+ 7 adj-ix adjective (keiyoushi) - yoi/ii class
9
+ 8 adv-to adverb taking the 'to' particle
10
+ 9 aux auxiliary
11
+ 10 aux-adj auxiliary adjective
12
+ 11 aux-v auxiliary verb
13
+ 12 conj conjunction
14
+ 13 exp expressions (phrases, clauses, etc.)
15
+ 14 int interjection (kandoushi)
16
+ 15 cop copula
17
+ 17 n noun (common) (futsuumeishi)
18
+ 18 n-adv adverbial noun (fukushitekimeishi)
19
+ 19 n-suf noun, used as a suffix
20
+ 20 n-pref noun, used as a prefix
21
+ 21 n-t noun (temporal) (jisoumeishi)
22
+ 24 num numeric
23
+ 25 pref prefix
24
+ 26 prt particle
25
+ 27 suf suffix
26
+ 28 v1 Ichidan verb
27
+ 29 v1-s Ichidan verb - kureru special class
28
+ 30 v5aru Godan verb - -aru special class
29
+ 31 v5b Godan verb with 'bu' ending
30
+ 32 v5g Godan verb with 'gu' ending
31
+ 33 v5k Godan verb with 'ku' ending
32
+ 34 v5k-s Godan verb - Iku/Yuku special class
33
+ 35 v5m Godan verb with 'mu' ending
34
+ 36 v5n Godan verb with 'nu' ending
35
+ 37 v5r Godan verb with 'ru' ending
36
+ 38 v5r-i Godan verb with 'ru' ending (irregular verb)
37
+ 39 v5s Godan verb with 'su' ending
38
+ 40 v5t Godan verb with 'tsu' ending
39
+ 41 v5u Godan verb with 'u' ending
40
+ 42 v5u-s Godan verb with 'u' ending (special class)
41
+ 43 v5uru Godan verb - Uru old class verb (old form of Eru)
42
+ 44 vi intransitive verb
43
+ 45 vk Kuru verb - special class
44
+ 46 vs noun or participle which takes the aux. verb suru
45
+ 47 vs-s suru verb - special class
46
+ 48 vs-i suru verb - included
47
+ 49 vz Ichidan verb - zuru verb (alternative form of -jiru verbs)
48
+ 50 vt transitive verb
49
+ 51 ctr counter
50
+ 52 vn irregular nu verb
51
+ 53 v4r Yodan verb with 'ru' ending (archaic)
52
+ 56 adj-f noun or verb acting prenominally
53
+ 58 vr irregular ru verb, plain form ends with -ri
54
+ 59 v2a-s Nidan verb with 'u' ending (archaic)
55
+ 60 v4h Yodan verb with 'hu/fu' ending (archaic)
56
+ 61 pn pronoun
57
+ 62 vs-c su verb - precursor to the modern suru
58
+ 63 adj-kari 'kari' adjective (archaic)
59
+ 64 adj-ku 'ku' adjective (archaic)
60
+ 65 adj-shiku 'shiku' adjective (archaic)
61
+ 66 adj-nari archaic/formal form of na-adjective
62
+ 67 n-pr proper noun
63
+ 68 v-unspec verb unspecified
64
+ 69 v4k Yodan verb with 'ku' ending (archaic)
65
+ 70 v4g Yodan verb with 'gu' ending (archaic)
66
+ 71 v4s Yodan verb with 'su' ending (archaic)
67
+ 72 v4t Yodan verb with 'tsu' ending (archaic)
68
+ 73 v4n Yodan verb with 'nu' ending (archaic)
69
+ 74 v4b Yodan verb with 'bu' ending (archaic)
70
+ 75 v4m Yodan verb with 'mu' ending (archaic)
71
+ 76 v2k-k Nidan verb (upper class) with 'ku' ending (archaic)
72
+ 77 v2g-k Nidan verb (upper class) with 'gu' ending (archaic)
73
+ 78 v2t-k Nidan verb (upper class) with 'tsu' ending (archaic)
74
+ 79 v2d-k Nidan verb (upper class) with 'dzu' ending (archaic)
75
+ 80 v2h-k Nidan verb (upper class) with 'hu/fu' ending (archaic)
76
+ 81 v2b-k Nidan verb (upper class) with 'bu' ending (archaic)
77
+ 82 v2m-k Nidan verb (upper class) with 'mu' ending (archaic)
78
+ 83 v2y-k Nidan verb (upper class) with 'yu' ending (archaic)
79
+ 84 v2r-k Nidan verb (upper class) with 'ru' ending (archaic)
80
+ 85 v2k-s Nidan verb (lower class) with 'ku' ending (archaic)
81
+ 86 v2g-s Nidan verb (lower class) with 'gu' ending (archaic)
82
+ 87 v2s-s Nidan verb (lower class) with 'su' ending (archaic)
83
+ 88 v2z-s Nidan verb (lower class) with 'zu' ending (archaic)
84
+ 89 v2t-s Nidan verb (lower class) with 'tsu' ending (archaic)
85
+ 90 v2d-s Nidan verb (lower class) with 'dzu' ending (archaic)
86
+ 91 v2n-s Nidan verb (lower class) with 'nu' ending (archaic)
87
+ 92 v2h-s Nidan verb (lower class) with 'hu/fu' ending (archaic)
88
+ 93 v2b-s Nidan verb (lower class) with 'bu' ending (archaic)
89
+ 94 v2m-s Nidan verb (lower class) with 'mu' ending (archaic)
90
+ 95 v2y-s Nidan verb (lower class) with 'yu' ending (archaic)
91
+ 96 v2r-s Nidan verb (lower class) with 'ru' ending (archaic)
92
+ 97 v2w-s Nidan verb (lower class) with 'u' ending and 'we' conjugation (archaic)
93
+ 98 unc unclassified
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Daidai
6
+ # Loads and memoizes the JMdictDB conjugation tables vendored under
7
+ # `resources/`. The files are tab-separated and copied verbatim from jconj
8
+ # (Stuart McGraw / EDRDG) β€” see NOTICE. Keep them in sync with upstream via
9
+ # `rake daidai:sync`.
10
+ module Tables
11
+ DIR = File.expand_path("resources", __dir__)
12
+
13
+ # One okurigana rule: how to turn a dictionary form into one conjugation.
14
+ Okurigana = Struct.new(:stem, :okuri, :euphr, :euphk, keyword_init: true)
15
+
16
+ class << self
17
+ # conjugation id (Integer) => human name ("Past (~ta)", …)
18
+ def conj
19
+ @conj ||= read("conj.csv").to_h { |r| [ r["id"].to_i, r["name"] ] }
20
+ end
21
+
22
+ # [pos_id, conj_id, negative?, polite?, onum] => Okurigana
23
+ def conjo
24
+ @conjo ||= read("conjo.csv").each_with_object({}) do |r, table|
25
+ key = [ r["pos"].to_i, r["conj"].to_i, r["neg"] == "t", r["fml"] == "t", r["onum"].to_i ]
26
+ table[key] = Okurigana.new(
27
+ stem: r["stem"].to_i,
28
+ okuri: r["okuri"].to_s,
29
+ euphr: presence(r["euphr"]),
30
+ euphk: presence(r["euphk"])
31
+ )
32
+ end
33
+ end
34
+
35
+ # JMdict POS keyword ("v5k", "adj-i", …) => conjo pos id (Integer)
36
+ def pos_ids
37
+ @pos_ids ||= read("kwpos.csv").to_h { |r| [ r["kw"], r["id"].to_i ] }
38
+ end
39
+
40
+ def reload!
41
+ @conj = @conjo = @pos_ids = nil
42
+ end
43
+
44
+ private
45
+
46
+ def read(file, headers: true)
47
+ CSV.read(File.join(DIR, file), col_sep: "\t", headers: headers, quote_char: '"')
48
+ end
49
+
50
+ def presence(value)
51
+ value.nil? || value.empty? ? nil : value
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Daidai
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,134 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Daidai
4
+ # The conjugation forms daidai generates, in table order, with display labels.
5
+ # These names replace the numeric conjugation ids of the underlying tables.
6
+ FORMS = {
7
+ non_past: "Non-past",
8
+ past: "Past",
9
+ te: "Te-form",
10
+ provisional: "Provisional (~eba)",
11
+ potential: "Potential",
12
+ passive: "Passive",
13
+ causative: "Causative",
14
+ causative_passive: "Causative-passive",
15
+ volitional: "Volitional",
16
+ imperative: "Imperative",
17
+ conditional: "Conditional (~tara)",
18
+ alternative: "Alternative (~tari)",
19
+ stem: "Continuative (~i)"
20
+ }.freeze
21
+
22
+ # conjo.csv conjugation id (1..13) => form name, in the same order as FORMS.
23
+ FORM_BY_ID = FORMS.keys.each_with_index.to_h { |name, i| [ i + 1, name ] }.freeze
24
+
25
+ # A single conjugated form. `kanji` and `reading` hold the inflected surface
26
+ # and its kana; either may be nil (a kana-only word has no `kanji`, and the
27
+ # `reading` is only filled in when one was supplied). `onum` distinguishes
28
+ # equally-valid variants of the same form (e.g. ~γͺくて vs ~γͺいで).
29
+ Form = Struct.new(:name, :negative, :polite, :onum, :kanji, :reading, keyword_init: true) do
30
+ def negative? = negative
31
+ def polite? = polite
32
+
33
+ # The primary (most standard) variant for its form/polarity/formality; see
34
+ # Word#variants for the alternatives (e.g. ~γͺくて beside ~γͺいで).
35
+ def primary? = onum == 1
36
+
37
+ # Human label for this form ("Past", "Te-form", …).
38
+ def label = FORMS[name]
39
+
40
+ # The text to show: the kanji writing if there is one, otherwise the kana.
41
+ def to_s = (kanji || reading).to_s
42
+ alias_method :text, :to_s
43
+
44
+ def inspect = "#<Daidai::Form #{name}#{" negative" if negative}#{" polite" if polite}: #{self}>"
45
+ end
46
+
47
+ # A conjugated word β€” the full paradigm for one dictionary-form input.
48
+ #
49
+ # Forms are reached by name, with optional negative:/polite: modifiers:
50
+ #
51
+ # word.past #=> Form (plain affirmative)
52
+ # word.past(polite: true) #=> Form
53
+ # word.non_past(negative: true) #=> Form
54
+ #
55
+ # …or through fluent views that read like grammar (and chain):
56
+ #
57
+ # word.polite.past
58
+ # word.negative.non_past
59
+ # word.polite.negative.te
60
+ #
61
+ # `word[:past, polite: true]` does the same dynamically, and a Word is
62
+ # Enumerable over all of its forms.
63
+ class Word
64
+ include Enumerable
65
+
66
+ attr_reader :word, :pos, :kind, :forms
67
+
68
+ def initialize(word:, pos:, kind:, forms:)
69
+ @word = word
70
+ @pos = pos
71
+ @kind = kind
72
+ @forms = forms
73
+ @index = forms.group_by { |f| [ f.name, f.negative, f.polite ] }
74
+ end
75
+
76
+ # The primary Form for `name` in the given polarity/formality, or nil.
77
+ def [](name, negative: false, polite: false)
78
+ @index[[ name, negative, polite ]]&.min_by(&:onum)
79
+ end
80
+ alias form []
81
+
82
+ # Every accepted variant (all onums) for a form, primary first.
83
+ def variants(name, negative: false, polite: false)
84
+ (@index[[ name, negative, polite ]] || []).sort_by(&:onum)
85
+ end
86
+
87
+ def each(&) = @forms.each(&)
88
+
89
+ # The form names present for this word, in table order.
90
+ def conjugations
91
+ present = @forms.map(&:name).uniq
92
+ FORMS.keys.select { |name| present.include?(name) }
93
+ end
94
+
95
+ # Fluent views β€” a lens with polarity/formality pre-applied.
96
+ def polite = View.new(self, negative: false, polite: true)
97
+ def plain = View.new(self, negative: false, polite: false)
98
+ def negative = View.new(self, negative: true, polite: false)
99
+ def affirmative = View.new(self, negative: false, polite: false)
100
+
101
+ FORMS.each_key do |name|
102
+ define_method(name) do |negative: false, polite: false|
103
+ self[name, negative: negative, polite: polite]
104
+ end
105
+ end
106
+
107
+ alias dictionary non_past
108
+ alias te_form te
109
+
110
+ def inspect = "#<Daidai::Word #{word} (#{pos}, #{kind}): #{@forms.size} forms>"
111
+ end
112
+
113
+ # A polarity/formality lens over a Word, returned by Word#polite etc. Calling
114
+ # a form name on it applies the accumulated modifiers; modifiers chain.
115
+ class View
116
+ def initialize(word, negative:, polite:)
117
+ @word = word
118
+ @negative = negative
119
+ @polite = polite
120
+ end
121
+
122
+ def polite = View.new(@word, negative: @negative, polite: true)
123
+ def plain = View.new(@word, negative: @negative, polite: false)
124
+ def negative = View.new(@word, negative: true, polite: @polite)
125
+ def affirmative = View.new(@word, negative: false, polite: @polite)
126
+
127
+ FORMS.each_key do |name|
128
+ define_method(name) { @word[name, negative: @negative, polite: @polite] }
129
+ end
130
+
131
+ alias dictionary non_past
132
+ alias te_form te
133
+ end
134
+ end
data/lib/daidai.rb ADDED
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "daidai/version"
4
+ require_relative "daidai/conjugator"
5
+ require_relative "daidai/kabosu"
6
+ require_relative "daidai/deinflector"
7
+
8
+ # Daidai (ζ©™) β€” Japanese verb and adjective conjugation in pure Ruby.
9
+ #
10
+ # The conjugation knowledge comes from the JMdictDB tables developed by Jim
11
+ # Breen's EDRDG project (via jconj); Daidai ports the table-driven algorithm and
12
+ # exposes a small, app-friendly API. Named after the bitter orange, a sibling of
13
+ # sudachi and kabosu.
14
+ #
15
+ # Daidai.conjugate(kanji: "書く", reading: "かく", pos: "v5k")
16
+ # #=> Daidai::Result
17
+ #
18
+ # Daidai.conjugatable?("n") #=> false
19
+ # Daidai.conjugatable?("v1") #=> true
20
+ module Daidai
21
+ class << self
22
+ # Conjugate a dictionary-form word.
23
+ #
24
+ # Daidai.conjugate("書く", "v5k") # kanji surface forms
25
+ # Daidai.conjugate("書く", "v5k", reading: "かく") # + the kana of each form
26
+ # Daidai.conjugate("する", "vs-i") # kana word (is its own reading)
27
+ #
28
+ # `word` is the dictionary form (kanji surface or kana). `pos` is a JMdict
29
+ # part-of-speech code ("v5k", "adj-i", …) or an array of them β€” the first
30
+ # conjugatable one wins. `reading` is optional: pass it only when you also
31
+ # want each form's kana (conjugation rewrites the okurigana, which is already
32
+ # in the surface, so the kanji forms need no reading). Returns a Daidai::Word,
33
+ # or nil when nothing is conjugatable.
34
+ #
35
+ # `pos` may be omitted, in which case the optional `kabosu` gem (Sudachi)
36
+ # resolves the dictionary form, POS and reading from `word` β€” even when `word`
37
+ # is itself inflected ("ι£ŸγΉγ¦γ„γ‚‹" β†’ conjugations of ι£ŸγΉγ‚‹). This raises
38
+ # Daidai::Kabosu::MissingDependency if kabosu/a dictionary isn't installed.
39
+ def conjugate(word, pos = nil, reading: nil)
40
+ return nil if word.nil? || word.to_s.empty?
41
+
42
+ if pos.nil?
43
+ resolved = Kabosu.resolve(word) or return nil
44
+ word, pos, reading = resolved.values_at(:word, :pos, :reading)
45
+ end
46
+
47
+ kanji = word.match?(/\p{Han}/) ? word : nil
48
+ reading ||= kanji ? nil : word
49
+ Conjugator.conjugate(kanji: kanji, reading: reading, pos: pos)
50
+ end
51
+
52
+ # Whether `pos` (a code or array of codes) describes a conjugatable word.
53
+ def conjugatable?(pos)
54
+ Conjugator.conjugatable?(pos)
55
+ end
56
+
57
+ # Deinflect an inflected surface form back to its dictionary form(s), naming
58
+ # each inflection along the way (the inverse of #conjugate):
59
+ #
60
+ # # each result has a `term` and its named `inflections` (surface -> dictionary):
61
+ # Daidai.deinflect("ι£ŸγΉγ¦γ‚‹") # includes #<Daidai::Deinflection ι£ŸγΉγ‚‹ [-いる, -て]>
62
+ # Daidai.deinflect("θͺ­γΎγͺγ‹γ£γŸ") # includes #<Daidai::Deinflection θͺ­γ‚€ [-た, negative]>
63
+ #
64
+ # Returns every candidate the rules can reach (each a Daidai::Deinflection);
65
+ # the rules are string-based and dictionary-free, so many candidates are not
66
+ # real words. A caller with a dictionary looks up each `term`; one without can
67
+ # keep only `dictionary_form?` candidates. Pure and offline β€” unlike the
68
+ # POS-less #conjugate, it needs no kabosu.
69
+ def deinflect(text)
70
+ return [] if text.nil? || text.to_s.empty?
71
+
72
+ Deinflector.deinflect(text)
73
+ end
74
+ end
75
+ end
metadata ADDED
@@ -0,0 +1,121 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: daidai
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - davafons
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2026-06-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: csv
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '5.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '13.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '13.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rubocop
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.0'
69
+ description: Daidai is a table-driven, pure-Ruby port of jconj's conjugation algorithm,
70
+ built on the JMdictDB conjugation tables. It conjugates Japanese verbs and adjectives
71
+ from a dictionary-form word and its JMdict part-of-speech code, with no native extension
72
+ and nothing to download at runtime.
73
+ email:
74
+ executables: []
75
+ extensions: []
76
+ extra_rdoc_files: []
77
+ files:
78
+ - CHANGELOG.md
79
+ - LICENSE
80
+ - NOTICE
81
+ - README.md
82
+ - lib/daidai.rb
83
+ - lib/daidai/conjugator.rb
84
+ - lib/daidai/deinflector.rb
85
+ - lib/daidai/kabosu.rb
86
+ - lib/daidai/resources/conj.csv
87
+ - lib/daidai/resources/conjo.csv
88
+ - lib/daidai/resources/conotes.csv
89
+ - lib/daidai/resources/japanese-transforms.json
90
+ - lib/daidai/resources/kwpos.csv
91
+ - lib/daidai/tables.rb
92
+ - lib/daidai/version.rb
93
+ - lib/daidai/word.rb
94
+ homepage: https://github.com/davafons/daidai
95
+ licenses:
96
+ - GPL-3.0-only
97
+ metadata:
98
+ source_code_uri: https://github.com/davafons/daidai
99
+ bug_tracker_uri: https://github.com/davafons/daidai/issues
100
+ changelog_uri: https://github.com/davafons/daidai/blob/main/CHANGELOG.md
101
+ rubygems_mfa_required: 'true'
102
+ post_install_message:
103
+ rdoc_options: []
104
+ require_paths:
105
+ - lib
106
+ required_ruby_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '3.1'
111
+ required_rubygems_version: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ requirements: []
117
+ rubygems_version: 3.5.22
118
+ signing_key:
119
+ specification_version: 4
120
+ summary: Pure-Ruby Japanese verb and adjective conjugation
121
+ test_files: []