kabosu 0.6.10.2 → 0.6.11.0.dev.20260627.a5a69e7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a3a2ef2f715270a804bd08e6189bb9563999b2a4a1769a776332e2c3f1e1e2e5
4
- data.tar.gz: fa3b664bfb5e6f82ba576c79a4a58fec2b8f062b2675c6092336c2cbac587350
3
+ metadata.gz: '09c9e8c9a75657b66be036aa6b88ed8f3d9e540551d01612fb73c0f73e3fb45c'
4
+ data.tar.gz: 104e52c5170dd5c8d5a7f03e88b6eb4185647b1804019733093e09b976af9b75
5
5
  SHA512:
6
- metadata.gz: 33fea126465110bb21be9355ae3f014154319837b6491dc0b83ad6121b14f6723f6d5051faeffe7420d752dc7f08a073ca4eec5ca5c0e2fd6783bda0e6412a6d
7
- data.tar.gz: c6dd393d79546dcf005c0c8262ecf14b4ca037e33e3c841bb8f104af05199d0fa1bb63578a71501c3c19d72916bb4b03c5308bde0ec9759b7fdbde61339fe879
6
+ metadata.gz: a5bc855d6659efe5450e43bf70c6d24438fe20fe333f7bcd08c0ea6983221c6b522952e7e5a65150051c191b8a2791f1d2f7ceb468ab80faa58e6844ff804dda
7
+ data.tar.gz: 7e86601193b2207986fd0fc43f875d0ff8f252f6264a1968f282bca643061a0307c97dff63a95dfae4a7035770fb4da288ab40b00a38e9957d91bc99cf7dba0a
data/Cargo.lock CHANGED
@@ -159,7 +159,7 @@ checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
159
159
 
160
160
  [[package]]
161
161
  name = "kabosu"
162
- version = "0.6.12"
162
+ version = "0.6.11"
163
163
  dependencies = [
164
164
  "magnus",
165
165
  "sudachi",
@@ -393,8 +393,8 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
393
393
 
394
394
  [[package]]
395
395
  name = "sudachi"
396
- version = "0.6.10"
397
- source = "git+https://github.com/WorksApplications/sudachi.rs?tag=v0.6.10#7e2f287bbfffc036421cf960802e41a696727747"
396
+ version = "0.6.11"
397
+ source = "git+https://github.com/WorksApplications/sudachi.rs?tag=v0.6.11#90fd6068c80c2fc3b63e0dbab0e341475bad4d8f"
398
398
  dependencies = [
399
399
  "aho-corasick",
400
400
  "bitflags",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kabosu"
3
- version = "0.6.10"
3
+ version = "0.6.11"
4
4
  edition = "2021"
5
5
  publish = false
6
6
 
@@ -9,4 +9,4 @@ crate-type = ["cdylib"]
9
9
 
10
10
  [dependencies]
11
11
  magnus = { version = "0.8", features = ["rb-sys"] }
12
- sudachi = { git = "https://github.com/WorksApplications/sudachi.rs", tag = "v0.6.10" }
12
+ sudachi = { git = "https://github.com/WorksApplications/sudachi.rs", tag = "v0.6.11" }
@@ -39,17 +39,16 @@ pub(crate) fn group_morphemes_rust(
39
39
  // POS helpers
40
40
 
41
41
  fn is_content_word(pos_id: u16, dict: &JapaneseDictionary) -> bool {
42
- match dict.grammar().pos_components(pos_id).first().map(|s| s.as_str()) {
43
- Some("助詞") | Some("助動詞") | Some("補助記号") | Some("記号") | Some("空白") => false,
44
- _ => true,
45
- }
42
+ !matches!(
43
+ dict.grammar()
44
+ .pos_components(pos_id)
45
+ .first()
46
+ .map(|s| s.as_str()),
47
+ Some("助詞") | Some("助動詞") | Some("補助記号") | Some("記号") | Some("空白")
48
+ )
46
49
  }
47
50
 
48
- fn extends_group(
49
- m: &MorphemeData,
50
- prev: &MorphemeData,
51
- dict: &JapaneseDictionary,
52
- ) -> bool {
51
+ fn extends_group(m: &MorphemeData, prev: &MorphemeData, dict: &JapaneseDictionary) -> bool {
53
52
  let comps = dict.grammar().pos_components(m.pos_id);
54
53
  let pos0 = comps.first().map(|s| s.as_str());
55
54
  let pos1 = comps.get(1).map(|s| s.as_str());
@@ -74,10 +73,12 @@ fn extends_group(
74
73
  return false;
75
74
  }
76
75
  // te-form auxiliary chain: て/で + いる/ある/くる/etc.
77
- let prev_pos0 = dict.grammar().pos_components(prev.pos_id).first().map(|s| s.as_str());
78
- if prev_pos0 == Some("助詞")
79
- && (prev.surface == "て" || prev.surface == "で")
80
- {
76
+ let prev_pos0 = dict
77
+ .grammar()
78
+ .pos_components(prev.pos_id)
79
+ .first()
80
+ .map(|s| s.as_str());
81
+ if prev_pos0 == Some("助詞") && (prev.surface == "て" || prev.surface == "で") {
81
82
  return true;
82
83
  }
83
84
  // compound verb (V+V) intentionally skipped — caller handles DB lookup
@@ -127,7 +128,10 @@ fn is_clause_boundary_particle(surface: &str) -> bool {
127
128
 
128
129
  fn is_verb_adj_adv(pos_id: u16, dict: &JapaneseDictionary) -> bool {
129
130
  matches!(
130
- dict.grammar().pos_components(pos_id).first().map(|s| s.as_str()),
131
+ dict.grammar()
132
+ .pos_components(pos_id)
133
+ .first()
134
+ .map(|s| s.as_str()),
131
135
  Some("動詞") | Some("形容詞") | Some("形状詞")
132
136
  )
133
137
  }
@@ -1,3 +1,8 @@
1
+ // sudachi::error::SudachiError is a large external enum; boxing it to satisfy
2
+ // clippy::result_large_err would churn Result signatures throughout the crate
3
+ // for no real benefit on these cold error paths.
4
+ #![allow(clippy::result_large_err)]
5
+
1
6
  mod dictionary;
2
7
  mod errors;
3
8
  mod grouping;
@@ -254,7 +254,10 @@ impl RbMorpheme {
254
254
  let pos_id = self.data.pos_id;
255
255
 
256
256
  {
257
- let cache = POS_CACHE.get_or_init(|| Mutex::new(HashMap::new())).lock().unwrap();
257
+ let cache = POS_CACHE
258
+ .get_or_init(|| Mutex::new(HashMap::new()))
259
+ .lock()
260
+ .unwrap();
258
261
  if let Some(&cached) = cache.get(&(dict_ptr, pos_id)) {
259
262
  return Ok(cached.0);
260
263
  }
@@ -272,7 +275,11 @@ impl RbMorpheme {
272
275
  // arrays are tiny. The cache lives for the process lifetime.
273
276
  gc::register_mark_object(ary);
274
277
 
275
- POS_CACHE.get_or_init(|| Mutex::new(HashMap::new())).lock().unwrap().insert((dict_ptr, pos_id), CachedRArray(ary));
278
+ POS_CACHE
279
+ .get_or_init(|| Mutex::new(HashMap::new()))
280
+ .lock()
281
+ .unwrap()
282
+ .insert((dict_ptr, pos_id), CachedRArray(ary));
276
283
 
277
284
  Ok(ary)
278
285
  }
@@ -8,8 +8,8 @@ module Kabosu
8
8
  class DictManager
9
9
  EDITIONS = %w[small core full].freeze
10
10
  EDITION_PRIORITY = %w[full core small].freeze
11
- GITHUB_REPO = "WorksApplications/SudachiDict"
12
- GITHUB_API = "https://api.github.com"
11
+ GITHUB_REPO = "WorksApplications/SudachiDict".freeze
12
+ GITHUB_API = "https://api.github.com".freeze
13
13
 
14
14
  class DictNotFound < StandardError; end
15
15
  class DownloadError < StandardError; end
@@ -42,7 +42,7 @@ module Kabosu
42
42
  dic_path = File.join(dest_dir, "system_#{edition}.dic")
43
43
 
44
44
  if File.exist?(dic_path)
45
- $stderr.puts "Already installed: #{dic_path}"
45
+ warn "Already installed: #{dic_path}"
46
46
  return dic_path
47
47
  end
48
48
 
@@ -54,11 +54,9 @@ module Kabosu
54
54
  extract(zip_path, @dir)
55
55
  FileUtils.rm_f(zip_path)
56
56
 
57
- unless File.exist?(dic_path)
58
- raise DownloadError, "Expected #{dic_path} after extraction, but file not found"
59
- end
57
+ raise DownloadError, "Expected #{dic_path} after extraction, but file not found" unless File.exist?(dic_path)
60
58
 
61
- $stderr.puts "Installed: #{dic_path}"
59
+ warn "Installed: #{dic_path}"
62
60
  dic_path
63
61
  end
64
62
 
@@ -88,7 +86,7 @@ module Kabosu
88
86
  results = []
89
87
  return results unless Dir.exist?(@dir)
90
88
 
91
- Dir.glob(File.join(@dir, "sudachi-dictionary-*")).sort.reverse.each do |version_dir|
89
+ Dir.glob(File.join(@dir, "sudachi-dictionary-*")).reverse.each do |version_dir|
92
90
  next unless File.directory?(version_dir)
93
91
 
94
92
  version = File.basename(version_dir).sub("sudachi-dictionary-", "")
@@ -113,6 +111,7 @@ module Kabosu
113
111
  edition = validate_edition(edition)
114
112
  match = candidates.find { |d| d[:edition] == edition }
115
113
  raise DictNotFound, "No #{edition} dictionary installed" unless match
114
+
116
115
  return match[:path]
117
116
  end
118
117
 
@@ -140,14 +139,14 @@ module Kabosu
140
139
 
141
140
  targets.each do |d|
142
141
  FileUtils.rm_f(d[:path])
143
- $stderr.puts "Removed: #{d[:path]}"
142
+ warn "Removed: #{d[:path]}"
144
143
 
145
144
  # Clean up empty version directories
146
145
  version_dir = File.dirname(d[:path])
147
146
  dics_remaining = Dir.glob(File.join(version_dir, "system_*.dic"))
148
147
  if dics_remaining.empty?
149
148
  FileUtils.rm_rf(version_dir)
150
- $stderr.puts "Removed empty directory: #{version_dir}"
149
+ warn "Removed empty directory: #{version_dir}"
151
150
  end
152
151
  end
153
152
  end
@@ -178,6 +177,7 @@ module Kabosu
178
177
  unless EDITIONS.include?(edition)
179
178
  raise ArgumentError, "Unknown edition '#{edition}'. Must be one of: #{EDITIONS.join(", ")}"
180
179
  end
180
+
181
181
  edition
182
182
  end
183
183
 
@@ -186,7 +186,7 @@ module Kabosu
186
186
  end
187
187
 
188
188
  def download(url, dest)
189
- $stderr.puts "Downloading #{url}..."
189
+ warn "Downloading #{url}..."
190
190
  uri = resolve_redirects(URI(url))
191
191
 
192
192
  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https") do |http|
@@ -202,20 +202,22 @@ module Kabosu
202
202
  response.read_body do |chunk|
203
203
  f.write(chunk)
204
204
  written += chunk.bytesize
205
- if total && total > 0
205
+ if total&.positive?
206
206
  pct = (written * 100 / total).clamp(0, 100)
207
- $stderr.print "\r #{(written.to_f / 1024 / 1024).round(1)} / #{(total.to_f / 1024 / 1024).round(1)} MB (#{pct}%)"
207
+ done_mb = (written.to_f / 1024 / 1024).round(1)
208
+ total_mb = (total.to_f / 1024 / 1024).round(1)
209
+ $stderr.print "\r #{done_mb} / #{total_mb} MB (#{pct}%)"
208
210
  end
209
211
  end
210
212
  end
211
213
 
212
- $stderr.puts "\r #{(written.to_f / 1024 / 1024).round(1)} MB downloaded"
214
+ warn "\r #{(written.to_f / 1024 / 1024).round(1)} MB downloaded"
213
215
  end
214
216
  end
215
217
  end
216
218
 
217
219
  def resolve_redirects(uri, limit: 5)
218
- raise DownloadError, "Too many redirects" if limit == 0
220
+ raise DownloadError, "Too many redirects" if limit.zero?
219
221
 
220
222
  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https") do |http|
221
223
  response = http.request(Net::HTTP::Head.new(uri))
@@ -229,7 +231,7 @@ module Kabosu
229
231
  end
230
232
 
231
233
  def http_get(uri, headers: {}, redirect_limit: 5)
232
- raise DownloadError, "Too many redirects" if redirect_limit == 0
234
+ raise DownloadError, "Too many redirects" if redirect_limit.zero?
233
235
 
234
236
  http = Net::HTTP.new(uri.host, uri.port)
235
237
  http.use_ssl = (uri.scheme == "https")
@@ -248,7 +250,7 @@ module Kabosu
248
250
  end
249
251
 
250
252
  def extract(zip_path, dest_dir)
251
- $stderr.puts "Extracting..."
253
+ warn "Extracting..."
252
254
  Zip::File.open(zip_path) do |archive|
253
255
  archive.each do |entry|
254
256
  target = File.join(dest_dir, entry.name)
@@ -256,6 +258,7 @@ module Kabosu
256
258
  unless File.expand_path(target).start_with?(File.expand_path(dest_dir) + File::SEPARATOR)
257
259
  raise DownloadError, "Refusing to extract entry outside dest_dir: #{entry.name}"
258
260
  end
261
+
259
262
  FileUtils.mkdir_p(File.dirname(target))
260
263
  entry.extract(target) { true } # overwrite existing
261
264
  end
@@ -7,7 +7,7 @@ module Kabosu
7
7
  def initialize(source_or_morphemes, internal_cost: nil)
8
8
  @source = source_or_morphemes if lazy_source?(source_or_morphemes)
9
9
  @morphemes = @source ? Array.new(@source.size) : source_or_morphemes
10
- @internal_cost = internal_cost || (@source&.internal_cost)
10
+ @internal_cost = internal_cost || @source&.internal_cost
11
11
  end
12
12
 
13
13
  def each(&block)
@@ -62,7 +62,7 @@ module Kabosu
62
62
  end
63
63
 
64
64
  def surfaces
65
- return @source.surfaces if @source&.respond_to?(:surfaces)
65
+ return @source.surfaces if @source.respond_to?(:surfaces)
66
66
 
67
67
  map(&:surface)
68
68
  end
@@ -96,9 +96,7 @@ module Kabosu
96
96
  # source. Falls back to a Ruby implementation for already-materialized
97
97
  # lists so the method is always safe to call.
98
98
  def group_morphemes
99
- if @source&.respond_to?(:group_morphemes)
100
- return @source.group_morphemes
101
- end
99
+ return @source.group_morphemes if @source.respond_to?(:group_morphemes)
102
100
 
103
101
  groups = []
104
102
  each do |m|
@@ -131,10 +129,12 @@ module Kabosu
131
129
 
132
130
  def clause_boundary?(morpheme)
133
131
  return false unless morpheme
132
+
134
133
  pos = morpheme.part_of_speech
135
134
  return true if pos[0] == "助詞" &&
136
135
  %w[ながら たら ば と のに から ので けれど けど つつ なり や か かどうか とも].include?(morpheme.surface)
137
136
  return true if pos[0] == "助詞" && pos[1] == "接続助詞" && morpheme.surface == "が"
137
+
138
138
  false
139
139
  end
140
140
 
@@ -48,13 +48,15 @@ module Kabosu
48
48
 
49
49
  # Union: matches if either matcher matches.
50
50
  def |(other)
51
- a, b = self, other
51
+ a = self
52
+ b = other
52
53
  PosMatcher.new { |pos| a.match?(pos) || b.match?(pos) }
53
54
  end
54
55
 
55
56
  # Intersection: matches if both matchers match.
56
57
  def &(other)
57
- a, b = self, other
58
+ a = self
59
+ b = other
58
60
  PosMatcher.new { |pos| a.match?(pos) && b.match?(pos) }
59
61
  end
60
62
 
@@ -64,7 +66,8 @@ module Kabosu
64
66
  end
65
67
 
66
68
  def difference(other)
67
- a, b = self, other
69
+ a = self
70
+ b = other
68
71
  PosMatcher.new { |pos| a.match?(pos) && !b.match?(pos) }
69
72
  end
70
73
 
@@ -95,7 +98,7 @@ module Kabosu
95
98
  end
96
99
 
97
100
  def self.proper_nouns
98
- @proper_nouns ||= new(["名詞", "固有名詞"])
101
+ @proper_nouns ||= new(%w[名詞 固有名詞])
99
102
  end
100
103
 
101
104
  private
@@ -5,53 +5,54 @@ namespace :kabosu do
5
5
 
6
6
  desc "Install the core dictionary (default). VERSION=YYYYMMDD to pin a specific release."
7
7
  task :install do
8
- Kabosu::DictManager.new.install("core", version: ENV["VERSION"])
8
+ Kabosu::DictManager.new.install("core", version: ENV.fetch("VERSION", nil))
9
9
  end
10
10
 
11
11
  namespace :install do
12
12
  desc "Install the small dictionary. VERSION=YYYYMMDD to pin a specific release."
13
13
  task :small do
14
- Kabosu::DictManager.new.install("small", version: ENV["VERSION"])
14
+ Kabosu::DictManager.new.install("small", version: ENV.fetch("VERSION", nil))
15
15
  end
16
16
 
17
17
  desc "Install the core dictionary. VERSION=YYYYMMDD to pin a specific release."
18
18
  task :core do
19
- Kabosu::DictManager.new.install("core", version: ENV["VERSION"])
19
+ Kabosu::DictManager.new.install("core", version: ENV.fetch("VERSION", nil))
20
20
  end
21
21
 
22
22
  desc "Install the full dictionary. VERSION=YYYYMMDD to pin a specific release."
23
23
  task :full do
24
- Kabosu::DictManager.new.install("full", version: ENV["VERSION"])
24
+ Kabosu::DictManager.new.install("full", version: ENV.fetch("VERSION", nil))
25
25
  end
26
26
  end
27
27
 
28
- desc "Install a dictionary only if a matching one isn't already on disk. EDITION=core|small|full (default core), VERSION=YYYYMMDD optional."
28
+ desc "Install a dictionary only if a matching one isn't already on disk. " \
29
+ "EDITION=core|small|full (default core), VERSION=YYYYMMDD optional."
29
30
  task :install_if_missing do
30
31
  edition = ENV["EDITION"] || "core"
31
- Kabosu::DictManager.new.install_if_missing(edition, version: ENV["VERSION"])
32
+ Kabosu::DictManager.new.install_if_missing(edition, version: ENV.fetch("VERSION", nil))
32
33
  end
33
34
 
34
35
  # ── Remove ──
35
36
 
36
37
  desc "Remove all installed dictionaries, or a specific one with EDITION=small|core|full and/or VERSION=YYYYMMDD"
37
38
  task :remove do
38
- Kabosu::DictManager.new.remove(edition: ENV["EDITION"], version: ENV["VERSION"])
39
+ Kabosu::DictManager.new.remove(edition: ENV.fetch("EDITION", nil), version: ENV.fetch("VERSION", nil))
39
40
  end
40
41
 
41
42
  namespace :remove do
42
43
  desc "Remove the small dictionary."
43
44
  task :small do
44
- Kabosu::DictManager.new.remove(edition: "small", version: ENV["VERSION"])
45
+ Kabosu::DictManager.new.remove(edition: "small", version: ENV.fetch("VERSION", nil))
45
46
  end
46
47
 
47
48
  desc "Remove the core dictionary."
48
49
  task :core do
49
- Kabosu::DictManager.new.remove(edition: "core", version: ENV["VERSION"])
50
+ Kabosu::DictManager.new.remove(edition: "core", version: ENV.fetch("VERSION", nil))
50
51
  end
51
52
 
52
53
  desc "Remove the full dictionary."
53
54
  task :full do
54
- Kabosu::DictManager.new.remove(edition: "full", version: ENV["VERSION"])
55
+ Kabosu::DictManager.new.remove(edition: "full", version: ENV.fetch("VERSION", nil))
55
56
  end
56
57
  end
57
58
 
@@ -82,11 +83,9 @@ namespace :kabosu do
82
83
 
83
84
  desc "Show the path to the best available dictionary. EDITION=small|core|full to be specific."
84
85
  task :path do
85
- begin
86
- puts Kabosu::DictManager.new.find(edition: ENV["EDITION"])
87
- rescue Kabosu::DictManager::DictNotFound => e
88
- $stderr.puts e.message
89
- exit 1
90
- end
86
+ puts Kabosu::DictManager.new.find(edition: ENV.fetch("EDITION", nil))
87
+ rescue Kabosu::DictManager::DictNotFound => e
88
+ warn e.message
89
+ exit 1
91
90
  end
92
91
  end
@@ -1,3 +1,3 @@
1
1
  module Kabosu
2
- VERSION = "0.6.10.2"
2
+ VERSION = "0.6.11.0.dev.20260627.a5a69e7".freeze
3
3
  end
data/lib/kabosu.rb CHANGED
@@ -66,25 +66,17 @@ module Kabosu
66
66
  DEFAULT_CONFIG_PATH = File.expand_path("kabosu/resources/sudachi.json", __dir__).freeze
67
67
 
68
68
  class << self
69
- alias_method :_new, :new
69
+ alias _new new
70
70
 
71
71
  def new(config: nil, system_dict: nil, user_dicts: nil)
72
- unless config.nil? || config.is_a?(String)
73
- raise ArgumentError, "config must be a String or nil"
74
- end
75
- unless system_dict.nil? || system_dict.is_a?(String)
76
- raise ArgumentError, "system_dict must be a String or nil"
77
- end
72
+ raise ArgumentError, "config must be a String or nil" unless config.nil? || config.is_a?(String)
73
+ raise ArgumentError, "system_dict must be a String or nil" unless system_dict.nil? || system_dict.is_a?(String)
78
74
  unless user_dicts.nil? || user_dicts.is_a?(Array)
79
75
  raise ArgumentError, "user_dicts must be an Array<String> or nil"
80
76
  end
81
- if user_dicts&.any? { !_1.is_a?(String) }
82
- raise ArgumentError, "user_dicts must contain only String values"
83
- end
77
+ raise ArgumentError, "user_dicts must contain only String values" if user_dicts&.any? { !_1.is_a?(String) }
84
78
 
85
- if config.nil? && system_dict.nil?
86
- raise ArgumentError, "either config or system_dict is required"
87
- end
79
+ raise ArgumentError, "either config or system_dict is required" if config.nil? && system_dict.nil?
88
80
 
89
81
  # Default to the sudachi.json bundled with this gem when only
90
82
  # system_dict is given. sudachi.rs's own default config path is
@@ -113,9 +105,7 @@ module Kabosu
113
105
 
114
106
  def map_dictionary_init_error(error, config:, system_dict:)
115
107
  message = error.message
116
- if config && system_dict.nil?
117
- ConfigError.new(message)
118
- elsif message.match?(/config|setting\.json|json/i)
108
+ if (config && system_dict.nil?) || message.match?(/config|setting\.json|json/i)
119
109
  ConfigError.new(message)
120
110
  else
121
111
  DictionaryError.new(message)
@@ -127,40 +117,33 @@ module Kabosu
127
117
  end
128
118
  end
129
119
 
130
- alias_method :_create, :create
131
- alias_method :_lookup, :lookup
120
+ alias _create create
121
+ alias _lookup lookup
132
122
 
133
123
  def create(**options)
134
124
  unknown = options.keys - %i[mode fields debug projection]
135
- raise ArgumentError, "unknown keyword(s): #{unknown.join(', ')}" unless unknown.empty?
125
+ raise ArgumentError, "unknown keyword(s): #{unknown.join(", ")}" unless unknown.empty?
136
126
 
137
127
  mode = options.fetch(:mode, MODE_C)
138
128
  fields = options.fetch(:fields, nil)
139
129
  debug = options.fetch(:debug, false)
140
130
  projection = options.fetch(:projection, nil)
141
131
 
142
- unless fields.nil? || fields.is_a?(Array)
143
- raise ArgumentError, "fields must be an Array<String|Symbol> or nil"
144
- end
132
+ raise ArgumentError, "fields must be an Array<String|Symbol> or nil" unless fields.nil? || fields.is_a?(Array)
145
133
  if fields&.any? { !(_1.is_a?(String) || _1.is_a?(Symbol)) }
146
134
  raise ArgumentError, "fields must contain only String or Symbol values"
147
135
  end
148
- unless debug == true || debug == false
149
- raise ArgumentError, "debug must be true or false"
150
- end
136
+ raise ArgumentError, "debug must be true or false" unless [true, false].include?(debug)
151
137
 
152
- unless projection.nil?
153
- raise NotImplementedError, "projection is not supported yet"
154
- end
138
+ raise NotImplementedError, "projection is not supported yet" unless projection.nil?
155
139
 
156
140
  mode_str = Kabosu.__send__(:normalize_mode, mode)
157
141
  _create(mode_str, fields, debug)
158
142
  end
159
143
 
160
144
  def lookup(text)
161
- unless text.is_a?(String)
162
- raise ArgumentError, "text must be a String"
163
- end
145
+ raise ArgumentError, "text must be a String" unless text.is_a?(String)
146
+
164
147
  MorphemeList.new(_lookup(text))
165
148
  rescue RuntimeError => e
166
149
  raise LookupError.new(e.message), cause: e
@@ -170,12 +153,10 @@ module Kabosu
170
153
  # ── Tokenizer: wrap output in MorphemeList ──
171
154
 
172
155
  class Tokenizer
173
- alias_method :_tokenize, :tokenize
156
+ alias _tokenize tokenize
174
157
 
175
158
  def tokenize(text)
176
- unless text.is_a?(String)
177
- raise ArgumentError, "text must be a String"
178
- end
159
+ raise ArgumentError, "text must be a String" unless text.is_a?(String)
179
160
 
180
161
  batch = _tokenize(text)
181
162
  cost = batch.respond_to?(:internal_cost) ? batch.internal_cost : nil
@@ -186,12 +167,11 @@ module Kabosu
186
167
  end
187
168
 
188
169
  class Morpheme
189
- alias_method :_split, :split
170
+ alias _split split
190
171
 
191
172
  def split(mode: MODE_C, add_single: true)
192
- unless add_single == true || add_single == false
193
- raise ArgumentError, "add_single must be true or false"
194
- end
173
+ raise ArgumentError, "add_single must be true or false" unless [true, false].include?(add_single)
174
+
195
175
  mode_str = Kabosu.__send__(:normalize_mode, mode)
196
176
  MorphemeList.new(_split(mode_str, nil, add_single))
197
177
  rescue RuntimeError => e
@@ -200,29 +180,15 @@ module Kabosu
200
180
  end
201
181
 
202
182
  def self.split_sentences(text, limit: nil, with_checker: false, ranges: false, dictionary: nil)
203
- unless text.is_a?(String)
204
- raise ArgumentError, "text must be a String"
205
- end
206
- unless limit.nil? || limit.is_a?(Integer)
207
- raise ArgumentError, "limit must be an Integer or nil"
208
- end
209
- if limit && limit < 1
210
- raise ArgumentError, "limit must be greater than 0"
211
- end
212
- unless with_checker == true || with_checker == false
213
- raise ArgumentError, "with_checker must be true or false"
214
- end
215
- unless ranges == true || ranges == false
216
- raise ArgumentError, "ranges must be true or false"
217
- end
218
- unless dictionary.nil? || dictionary.is_a?(String)
219
- raise ArgumentError, "dictionary must be a String path or nil"
220
- end
183
+ raise ArgumentError, "text must be a String" unless text.is_a?(String)
184
+ raise ArgumentError, "limit must be an Integer or nil" unless limit.nil? || limit.is_a?(Integer)
185
+ raise ArgumentError, "limit must be greater than 0" if limit && limit < 1
186
+ raise ArgumentError, "with_checker must be true or false" unless [true, false].include?(with_checker)
187
+ raise ArgumentError, "ranges must be true or false" unless [true, false].include?(ranges)
188
+ raise ArgumentError, "dictionary must be a String path or nil" unless dictionary.nil? || dictionary.is_a?(String)
221
189
 
222
190
  dict_path = nil
223
- if with_checker
224
- dict_path = dictionary || Dictionary.path
225
- end
191
+ dict_path = dictionary || Dictionary.path if with_checker
226
192
 
227
193
  if ranges
228
194
  _split_sentences_with_ranges(text, limit, dict_path).map do |(start, finish, sentence)|
@@ -244,12 +210,8 @@ module Kabosu
244
210
  # Kabosu.tokenize("東京都に住んでいる", tokenizer: tok)
245
211
  #
246
212
  def self.tokenize(text, tokenizer:)
247
- unless text.is_a?(String)
248
- raise ArgumentError, "text must be a String"
249
- end
250
- unless tokenizer.is_a?(Tokenizer)
251
- raise ArgumentError, "tokenizer must be a Kabosu::Tokenizer"
252
- end
213
+ raise ArgumentError, "text must be a String" unless text.is_a?(String)
214
+ raise ArgumentError, "tokenizer must be a Kabosu::Tokenizer" unless tokenizer.is_a?(Tokenizer)
253
215
 
254
216
  batch = tokenizer.__send__(:_tokenize, text)
255
217
  cost = batch.respond_to?(:internal_cost) ? batch.internal_cost : nil
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kabosu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.10.2
4
+ version: 0.6.11.0.dev.20260627.a5a69e7
5
5
  platform: ruby
6
6
  authors:
7
7
  - davafons
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-05-31 00:00:00.000000000 Z
11
+ date: 2026-06-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -39,61 +39,75 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '2.3'
41
41
  - !ruby/object:Gem::Dependency
42
- name: rake
42
+ name: benchmark
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
59
  - - "~>"
46
60
  - !ruby/object:Gem::Version
47
- version: '13.0'
61
+ version: '5.0'
48
62
  type: :development
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
66
  - - "~>"
53
67
  - !ruby/object:Gem::Version
54
- version: '13.0'
68
+ version: '5.0'
55
69
  - !ruby/object:Gem::Dependency
56
- name: rake-compiler
70
+ name: rake
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
73
  - - "~>"
60
74
  - !ruby/object:Gem::Version
61
- version: '1.2'
75
+ version: '13.0'
62
76
  type: :development
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
80
  - - "~>"
67
81
  - !ruby/object:Gem::Version
68
- version: '1.2'
82
+ version: '13.0'
69
83
  - !ruby/object:Gem::Dependency
70
- name: benchmark
84
+ name: rake-compiler
71
85
  requirement: !ruby/object:Gem::Requirement
72
86
  requirements:
73
- - - ">="
87
+ - - "~>"
74
88
  - !ruby/object:Gem::Version
75
- version: '0'
89
+ version: '1.2'
76
90
  type: :development
77
91
  prerelease: false
78
92
  version_requirements: !ruby/object:Gem::Requirement
79
93
  requirements:
80
- - - ">="
94
+ - - "~>"
81
95
  - !ruby/object:Gem::Version
82
- version: '0'
96
+ version: '1.2'
83
97
  - !ruby/object:Gem::Dependency
84
- name: minitest
98
+ name: rubocop
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
101
  - - "~>"
88
102
  - !ruby/object:Gem::Version
89
- version: '5.0'
103
+ version: '1.0'
90
104
  type: :development
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements:
94
108
  - - "~>"
95
109
  - !ruby/object:Gem::Version
96
- version: '5.0'
110
+ version: '1.0'
97
111
  description: Kabosu provides Ruby bindings for sudachi.rs, a Rust implementation of
98
112
  the Sudachi Japanese morphological analyzer.
99
113
  email:
@@ -133,7 +147,8 @@ files:
133
147
  homepage: https://github.com/davafons/kabosu
134
148
  licenses:
135
149
  - Apache-2.0
136
- metadata: {}
150
+ metadata:
151
+ rubygems_mfa_required: 'true'
137
152
  post_install_message:
138
153
  rdoc_options: []
139
154
  require_paths: