daidai 0.1.1.dev.20260627.f7f9ee5 → 0.2.0.dev.20260627.281e515
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -1
- data/README.md +6 -0
- data/lib/daidai/deinflector.rb +45 -0
- data/lib/daidai/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 950df85fe41d6df4af5192dde5953268899223f1dd7337afc95770f9c75533f5
|
|
4
|
+
data.tar.gz: 7207139aaf8e822251f486696461b43abd9a2130e00c7352f155a45a980ecf36
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: db840bf6e68ff136bb5df9814010e2dbe3ae6f57db3093092553dfe740e182f2fa29b29b1161340a6cf3287a6eff37cf4e8b96110f872a528549b81c40e9123c
|
|
7
|
+
data.tar.gz: 53f590517637b65708def02a978ccd5cd1898f6e416e199af628e2c9ec8a2b7dfa0eb2b4b6752355a41fb29c1ff840c92300a1eb11360e59b68df5bc27b87c41
|
data/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,15 @@ follow [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
8
|
|
|
9
|
+
## [0.2.0] - 2026-06-27
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- `Daidai::Deinflection#labels` and `Daidai::Deinflector.label(name)`: friendly
|
|
14
|
+
English names for deinflection rules ("-いる" → "progressive", "-て" →
|
|
15
|
+
"te-form"). daidai now owns the inflection-naming vocabulary so consumers
|
|
16
|
+
localise it rather than each maintaining their own map.
|
|
17
|
+
|
|
9
18
|
## [0.1.1] - 2026-06-27
|
|
10
19
|
|
|
11
20
|
### Changed
|
|
@@ -35,6 +44,7 @@ follow [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
|
35
44
|
inverse of `#conjugate`). Ported from Yomitan's Japanese language transforms;
|
|
36
45
|
also covers colloquial contractions (てる, ちゃう, …). See `Daidai::Deinflector`.
|
|
37
46
|
|
|
38
|
-
[Unreleased]: https://github.com/davafons/daidai/compare/v0.
|
|
47
|
+
[Unreleased]: https://github.com/davafons/daidai/compare/v0.2.0...HEAD
|
|
48
|
+
[0.2.0]: https://github.com/davafons/daidai/compare/v0.1.1...v0.2.0
|
|
39
49
|
[0.1.1]: https://github.com/davafons/daidai/compare/v0.1.0...v0.1.1
|
|
40
50
|
[0.1.0]: https://github.com/davafons/daidai/releases/tag/v0.1.0
|
data/README.md
CHANGED
|
@@ -189,10 +189,16 @@ Each result is a `Daidai::Deinflection`:
|
|
|
189
189
|
d = Daidai.deinflect("食べてる").find { |x| x.term == "食べる" }
|
|
190
190
|
d.term # => "食べる" (the candidate dictionary form)
|
|
191
191
|
d.inflections # => ["-いる", "-て"] (rule names, surface to dictionary)
|
|
192
|
+
d.labels # => ["progressive", "te-form"] (friendly English names)
|
|
192
193
|
d.dictionary_form? # => true (chain lands on a known dictionary form)
|
|
193
194
|
d.to_s # => "食べる [-いる, -て]"
|
|
194
195
|
```
|
|
195
196
|
|
|
197
|
+
`inflections` are the stable rule ids (ported from Yomitan, terse by design);
|
|
198
|
+
`labels` names the grammar for display. `Daidai::Deinflector.label("-いる")` does
|
|
199
|
+
the single lookup (falling back to the id for anything uncurated). Localise the
|
|
200
|
+
labels downstream if your app is multilingual.
|
|
201
|
+
|
|
196
202
|
Deinflection is rule-based and **dictionary-free**, so it returns *every* base
|
|
197
203
|
form the rules can reach, many of which are not real words (食べてる also yields
|
|
198
204
|
食べつ as a hypothetical potential). It is meant to feed a dictionary lookup: keep
|
data/lib/daidai/deinflector.rb
CHANGED
|
@@ -14,6 +14,11 @@ module Daidai
|
|
|
14
14
|
Deinflection = Struct.new(:term, :inflections, :dictionary_form, keyword_init: true) do
|
|
15
15
|
def dictionary_form? = dictionary_form
|
|
16
16
|
|
|
17
|
+
# The inflections as friendly English labels for display (e.g. "-いる" =>
|
|
18
|
+
# "progressive", "-て" => "te-form"), via Deinflector.label. Localise these
|
|
19
|
+
# downstream (i18n) if your app is multilingual.
|
|
20
|
+
def labels = inflections.map { |name| Deinflector.label(name) }
|
|
21
|
+
|
|
17
22
|
def to_s = inflections.empty? ? term : "#{term} [#{inflections.join(", ")}]"
|
|
18
23
|
|
|
19
24
|
def inspect = "#<Daidai::Deinflection #{self}>"
|
|
@@ -34,6 +39,39 @@ module Daidai
|
|
|
34
39
|
module Deinflector
|
|
35
40
|
DATA_FILE = File.expand_path("resources/japanese-transforms.json", __dir__)
|
|
36
41
|
|
|
42
|
+
# Friendly English labels for the deinflection rule names #deinflect emits.
|
|
43
|
+
# The underlying names (ported from Yomitan) are terse and sometimes symbolic
|
|
44
|
+
# ("-いる", "-て", "-ます"); these name the grammar instead ("progressive",
|
|
45
|
+
# "te-form", "polite"). This is daidai's curation, not Yomitan data — it is
|
|
46
|
+
# the single source of truth for naming an inflection, so consumers localise
|
|
47
|
+
# these rather than maintain their own map. Keyed by the rule name; see
|
|
48
|
+
# Deinflector.label for the lookup (which falls back to the name itself).
|
|
49
|
+
LABELS = {
|
|
50
|
+
"-いる" => "progressive", "-て" => "te-form", "-た" => "past",
|
|
51
|
+
"-ます" => "polite", "negative" => "negative", "passive" => "passive",
|
|
52
|
+
"potential" => "potential", "potential or passive" => "potential / passive",
|
|
53
|
+
"causative" => "causative", "short causative" => "short causative",
|
|
54
|
+
"volitional" => "volitional", "volitional slang" => "volitional (slang)",
|
|
55
|
+
"imperative" => "imperative", "continuative" => "continuative",
|
|
56
|
+
"-たい" => "desiderative (-tai)", "-たら" => "conditional (-tara)",
|
|
57
|
+
"-たり" => "representative (-tari)", "-ば" => "provisional (-ba)",
|
|
58
|
+
"-ゃ" => "conditional contraction (-ya)", "-ちゃ" => "contracted (-cha)",
|
|
59
|
+
"-ちゃう" => "completive (-chau)", "-ちまう" => "completive (-chimau)",
|
|
60
|
+
"-しまう" => "completive (-shimau)", "-おく" => "preparatory (-oku)",
|
|
61
|
+
"-そう" => "looks like (-sou)", "-すぎる" => "excessive (-sugiru)",
|
|
62
|
+
"-過ぎる" => "excessive (-sugiru)", "-なさい" => "polite imperative (-nasai)",
|
|
63
|
+
"-さ" => "nominalization (-sa)", "-げ" => "appearance (-ge)",
|
|
64
|
+
"-がる" => "showing signs (-garu)", "-やがる" => "contemptuous (-yagaru)",
|
|
65
|
+
"-ず" => "negative (-zu)", "-ぬ" => "negative (-nu)", "-ん" => "negative (-n)",
|
|
66
|
+
"-ざる" => "negative (-zaru)", "-ねば" => "negative conditional (-neba)",
|
|
67
|
+
"-まい" => "negative volitional (-mai)", "-く" => "adverbial (-ku)",
|
|
68
|
+
"-き" => "attributive (-ki)", "-む" => "archaic volitional (-mu)",
|
|
69
|
+
"-んばかり" => "on the verge (-nbakari)", "-んとする" => "intentive (-ntosuru)",
|
|
70
|
+
"-え" => "slang (-e)", "n-slang" => "n-slang",
|
|
71
|
+
"imperative negative slang" => "imperative negative (slang)",
|
|
72
|
+
"kansai-ben" => "kansai dialect"
|
|
73
|
+
}.freeze
|
|
74
|
+
|
|
37
75
|
# One deinflection rule: a test for the inflected form and how to undo it.
|
|
38
76
|
Rule = Struct.new(:is_inflected, :deinflect, :conditions_in, :conditions_out, keyword_init: true)
|
|
39
77
|
|
|
@@ -55,6 +93,13 @@ module Daidai
|
|
|
55
93
|
.uniq { |d| [ d.term, d.inflections ] }
|
|
56
94
|
end
|
|
57
95
|
|
|
96
|
+
# Friendly English label for a deinflection rule name (the strings in a
|
|
97
|
+
# Deinflection's #inflections), e.g. "-いる" => "progressive". Falls back to
|
|
98
|
+
# the name itself for anything not in LABELS, so it is always safe to call.
|
|
99
|
+
def label(name)
|
|
100
|
+
LABELS.fetch(name.to_s, name.to_s)
|
|
101
|
+
end
|
|
102
|
+
|
|
58
103
|
# The raw transformer output (a TransformedText per reachable form, including
|
|
59
104
|
# the identity). Mirrors Yomitan's LanguageTransformer#transform.
|
|
60
105
|
def transform(source_text)
|
data/lib/daidai/version.rb
CHANGED