kabosu 0.6.10.1-x86_64-linux → 0.6.10.2-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/kabosu/3.1/kabosu.so +0 -0
- data/lib/kabosu/3.2/kabosu.so +0 -0
- data/lib/kabosu/3.3/kabosu.so +0 -0
- data/lib/kabosu/3.4/kabosu.so +0 -0
- data/lib/kabosu/4.0/kabosu.so +0 -0
- data/lib/kabosu/morpheme_list.rb +48 -0
- data/lib/kabosu/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4a6abb566fd7057aaa0d80aede0b82b381bcb7fbffca7674bb9770730ed3c418
|
|
4
|
+
data.tar.gz: 9faa8f8680255f365b54a08d74eb468a385fc7fa196383c1ee61049e0b2463af
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 246853534f90ae364b6db3eacdd0da3ccfe26d275ba5032ac8b1ec5546e404d6b2eb62594aa018ce5c770450d404a3690773861e8649bb73e217d4dd6018859a
|
|
7
|
+
data.tar.gz: d64e5c3e15de731137ef790ad23b3eb34e77687e6f9b3d04c1d3a22c8c296bd65064c34ee6100259d40d8b8beb12caf4eca6ccd0932ea5714a1a4fdbd1b8520d
|
data/lib/kabosu/3.1/kabosu.so
CHANGED
|
Binary file
|
data/lib/kabosu/3.2/kabosu.so
CHANGED
|
Binary file
|
data/lib/kabosu/3.3/kabosu.so
CHANGED
|
Binary file
|
data/lib/kabosu/3.4/kabosu.so
CHANGED
|
Binary file
|
data/lib/kabosu/4.0/kabosu.so
CHANGED
|
Binary file
|
data/lib/kabosu/morpheme_list.rb
CHANGED
|
@@ -92,6 +92,54 @@ module Kabosu
|
|
|
92
92
|
surfaces.join
|
|
93
93
|
end
|
|
94
94
|
|
|
95
|
+
# jpdb-style grouping performed natively in Rust when backed by a lazy
|
|
96
|
+
# source. Falls back to a Ruby implementation for already-materialized
|
|
97
|
+
# lists so the method is always safe to call.
|
|
98
|
+
def group_morphemes
|
|
99
|
+
if @source&.respond_to?(:group_morphemes)
|
|
100
|
+
return @source.group_morphemes
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
groups = []
|
|
104
|
+
each do |m|
|
|
105
|
+
last = groups.last
|
|
106
|
+
if last && content_word?(last.first) && extends_group?(m, last.last)
|
|
107
|
+
last << m
|
|
108
|
+
else
|
|
109
|
+
groups << [m]
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
groups
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
private
|
|
116
|
+
|
|
117
|
+
def content_word?(morpheme)
|
|
118
|
+
!%w[助詞 助動詞 補助記号 記号 空白].include?(morpheme.part_of_speech.first)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def extends_group?(morpheme, prev = nil)
|
|
122
|
+
pos = morpheme.part_of_speech
|
|
123
|
+
pos1 = pos[0]
|
|
124
|
+
pos1 == "助動詞" ||
|
|
125
|
+
(pos1 == "助詞" && !clause_boundary?(morpheme) &&
|
|
126
|
+
(pos[1] == "接続助詞" ||
|
|
127
|
+
(pos[1] == "副助詞" && prev && %w[動詞 形容詞 形状詞].include?(prev.part_of_speech[0])))) ||
|
|
128
|
+
(pos1 == "動詞" && pos[1] == "非自立可能" &&
|
|
129
|
+
prev && prev.part_of_speech[0] == "助詞" && %w[て で].include?(prev.surface))
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def clause_boundary?(morpheme)
|
|
133
|
+
return false unless morpheme
|
|
134
|
+
pos = morpheme.part_of_speech
|
|
135
|
+
return true if pos[0] == "助詞" &&
|
|
136
|
+
%w[ながら たら ば と のに から ので けれど けど つつ なり や か かどうか とも].include?(morpheme.surface)
|
|
137
|
+
return true if pos[0] == "助詞" && pos[1] == "接続助詞" && morpheme.surface == "が"
|
|
138
|
+
false
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
public
|
|
142
|
+
|
|
95
143
|
# Filter morphemes by POS. Accepts a PosMatcher or an array pattern.
|
|
96
144
|
# Returns a new MorphemeList with only matching morphemes.
|
|
97
145
|
#
|
data/lib/kabosu/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kabosu
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.6.10.
|
|
4
|
+
version: 0.6.10.2
|
|
5
5
|
platform: x86_64-linux
|
|
6
6
|
authors:
|
|
7
7
|
- davafons
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-05-31 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rubyzip
|