wp2txt 1.1.3 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.dockerignore +12 -0
- data/.github/workflows/ci.yml +13 -13
- data/.gitignore +14 -0
- data/CHANGELOG.md +284 -0
- data/DEVELOPMENT.md +415 -0
- data/DEVELOPMENT_ja.md +415 -0
- data/Dockerfile +19 -10
- data/Gemfile +2 -8
- data/README.md +259 -123
- data/README_ja.md +375 -0
- data/Rakefile +4 -0
- data/bin/wp2txt +863 -161
- data/lib/wp2txt/article.rb +98 -13
- data/lib/wp2txt/bz2_validator.rb +239 -0
- data/lib/wp2txt/category_cache.rb +313 -0
- data/lib/wp2txt/cli.rb +319 -0
- data/lib/wp2txt/cli_ui.rb +428 -0
- data/lib/wp2txt/config.rb +158 -0
- data/lib/wp2txt/constants.rb +134 -0
- data/lib/wp2txt/data/html_entities.json +2135 -0
- data/lib/wp2txt/data/language_metadata.json +4769 -0
- data/lib/wp2txt/data/language_tiers.json +59 -0
- data/lib/wp2txt/data/mediawiki_aliases.json +12366 -0
- data/lib/wp2txt/data/template_aliases.json +193 -0
- data/lib/wp2txt/data/wikipedia_entities.json +12 -0
- data/lib/wp2txt/extractor.rb +545 -0
- data/lib/wp2txt/file_utils.rb +91 -0
- data/lib/wp2txt/formatter.rb +352 -0
- data/lib/wp2txt/global_data_cache.rb +353 -0
- data/lib/wp2txt/index_cache.rb +258 -0
- data/lib/wp2txt/magic_words.rb +353 -0
- data/lib/wp2txt/memory_monitor.rb +236 -0
- data/lib/wp2txt/multistream.rb +1383 -0
- data/lib/wp2txt/output_writer.rb +182 -0
- data/lib/wp2txt/parser_functions.rb +606 -0
- data/lib/wp2txt/ractor_worker.rb +215 -0
- data/lib/wp2txt/regex.rb +396 -12
- data/lib/wp2txt/section_extractor.rb +354 -0
- data/lib/wp2txt/stream_processor.rb +271 -0
- data/lib/wp2txt/template_expander.rb +830 -0
- data/lib/wp2txt/text_processing.rb +337 -0
- data/lib/wp2txt/utils.rb +629 -270
- data/lib/wp2txt/version.rb +1 -1
- data/lib/wp2txt.rb +53 -26
- data/scripts/benchmark_regex.rb +161 -0
- data/scripts/fetch_html_entities.rb +94 -0
- data/scripts/fetch_language_metadata.rb +180 -0
- data/scripts/fetch_mediawiki_data.rb +334 -0
- data/scripts/fetch_template_data.rb +186 -0
- data/scripts/profile_memory.rb +139 -0
- data/spec/article_spec.rb +402 -0
- data/spec/auto_download_spec.rb +314 -0
- data/spec/bz2_validator_spec.rb +193 -0
- data/spec/category_cache_spec.rb +226 -0
- data/spec/category_fetcher_spec.rb +504 -0
- data/spec/cleanup_spec.rb +197 -0
- data/spec/cli_options_spec.rb +678 -0
- data/spec/cli_spec.rb +876 -0
- data/spec/config_spec.rb +194 -0
- data/spec/constants_spec.rb +138 -0
- data/spec/file_utils_spec.rb +170 -0
- data/spec/fixtures/samples.rb +181 -0
- data/spec/formatter_sections_spec.rb +382 -0
- data/spec/global_data_cache_spec.rb +186 -0
- data/spec/index_cache_spec.rb +210 -0
- data/spec/integration_spec.rb +543 -0
- data/spec/magic_words_spec.rb +261 -0
- data/spec/markers_spec.rb +476 -0
- data/spec/memory_monitor_spec.rb +192 -0
- data/spec/multistream_spec.rb +690 -0
- data/spec/output_writer_spec.rb +400 -0
- data/spec/parser_functions_spec.rb +455 -0
- data/spec/ractor_worker_spec.rb +197 -0
- data/spec/regex_spec.rb +281 -0
- data/spec/section_extractor_spec.rb +397 -0
- data/spec/spec_helper.rb +63 -0
- data/spec/stream_processor_spec.rb +579 -0
- data/spec/template_data_spec.rb +246 -0
- data/spec/template_expander_spec.rb +472 -0
- data/spec/template_processing_spec.rb +217 -0
- data/spec/text_processing_spec.rb +312 -0
- data/spec/utils_spec.rb +195 -16
- data/spec/wp2txt_spec.rb +510 -0
- data/wp2txt.gemspec +5 -3
- metadata +146 -18
- data/.rubocop.yml +0 -80
- data/data/output_samples/testdata_en.txt +0 -23002
- data/data/output_samples/testdata_en_category.txt +0 -132
- data/data/output_samples/testdata_en_summary.txt +0 -1376
- data/data/output_samples/testdata_ja.txt +0 -22774
- data/data/output_samples/testdata_ja_category.txt +0 -206
- data/data/output_samples/testdata_ja_summary.txt +0 -1560
- data/data/testdata_en.bz2 +0 -0
- data/data/testdata_ja.bz2 +0 -0
- data/image/screenshot.png +0 -0
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pastel"
|
|
4
|
+
require "tty-spinner"
|
|
5
|
+
require "tty-progressbar"
|
|
6
|
+
require_relative "constants"
|
|
7
|
+
|
|
8
|
+
module Wp2txt
|
|
9
|
+
# CLI UI helper module for consistent styling
|
|
10
|
+
module CliUI
|
|
11
|
+
# Exit codes for CLI
|
|
12
|
+
EXIT_SUCCESS = 0
|
|
13
|
+
EXIT_ERROR = 1
|
|
14
|
+
EXIT_PARTIAL = 2 # Partial success (e.g., some articles not found)
|
|
15
|
+
|
|
16
|
+
# Icons for status indicators
|
|
17
|
+
ICONS = {
|
|
18
|
+
success: "✔",
|
|
19
|
+
error: "✖",
|
|
20
|
+
warning: "!",
|
|
21
|
+
info: "ℹ",
|
|
22
|
+
arrow: "→",
|
|
23
|
+
bullet: "•",
|
|
24
|
+
check: "✓",
|
|
25
|
+
cross: "✗",
|
|
26
|
+
star: "★"
|
|
27
|
+
}.freeze
|
|
28
|
+
|
|
29
|
+
# Configure UI settings
|
|
30
|
+
# @param no_color [Boolean] Disable colors
|
|
31
|
+
# @param quiet [Boolean] Suppress progress output
|
|
32
|
+
def configure_ui(no_color: false, quiet: false)
|
|
33
|
+
@no_color = no_color || color_disabled_by_env?
|
|
34
|
+
@quiet = quiet
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Check if color is disabled by environment
|
|
38
|
+
# @return [Boolean]
|
|
39
|
+
def color_disabled_by_env?
|
|
40
|
+
# NO_COLOR is a standard: https://no-color.org/
|
|
41
|
+
ENV.key?("NO_COLOR") || ENV["TERM"] == "dumb"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Check if quiet mode is enabled
|
|
45
|
+
# @return [Boolean]
|
|
46
|
+
def quiet?
|
|
47
|
+
@quiet || false
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Check if color is disabled
|
|
51
|
+
# @return [Boolean]
|
|
52
|
+
def no_color?
|
|
53
|
+
@no_color || false
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Initialize pastel for colors
|
|
57
|
+
def pastel
|
|
58
|
+
@pastel ||= Pastel.new(enabled: !no_color?)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Reset pastel instance (needed after configure_ui)
|
|
62
|
+
def reset_pastel!
|
|
63
|
+
@pastel = nil
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Print a section header with optional step indicator
|
|
67
|
+
# @param title [String] Section title
|
|
68
|
+
# @param step [Integer, nil] Current step number
|
|
69
|
+
# @param total_steps [Integer, nil] Total number of steps
|
|
70
|
+
def print_header(title, step: nil, total_steps: nil)
|
|
71
|
+
return if quiet?
|
|
72
|
+
|
|
73
|
+
puts
|
|
74
|
+
if step && total_steps
|
|
75
|
+
step_indicator = pastel.dim("[#{step}/#{total_steps}]")
|
|
76
|
+
puts "#{step_indicator} #{pastel.cyan.bold(title)}"
|
|
77
|
+
else
|
|
78
|
+
puts pastel.cyan.bold("═══ #{title} ═══")
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Print a sub-header
|
|
83
|
+
# @param title [String] Sub-header title
|
|
84
|
+
def print_subheader(title)
|
|
85
|
+
return if quiet?
|
|
86
|
+
|
|
87
|
+
puts
|
|
88
|
+
puts pastel.bold("─── #{title} ───")
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Print key-value info line
|
|
92
|
+
# @param key [String] Label
|
|
93
|
+
# @param value [String] Value
|
|
94
|
+
# @param indent [Integer] Indentation level
|
|
95
|
+
def print_info(key, value, indent: 0)
|
|
96
|
+
return if quiet?
|
|
97
|
+
|
|
98
|
+
prefix = " " * indent
|
|
99
|
+
puts "#{prefix}#{pastel.dim(key + ":")} #{value}"
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Print a success message
|
|
103
|
+
# @param message [String] Message
|
|
104
|
+
def print_success(message)
|
|
105
|
+
return if quiet?
|
|
106
|
+
|
|
107
|
+
puts "#{pastel.green(ICONS[:success])} #{message}"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Print an error message (always shown, even in quiet mode)
|
|
111
|
+
# @param message [String] Message
|
|
112
|
+
def print_error(message)
|
|
113
|
+
# Errors are always shown
|
|
114
|
+
$stderr.puts "#{pastel.red(ICONS[:error])} #{message}"
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Print a warning message (always shown, even in quiet mode)
|
|
118
|
+
# @param message [String] Message
|
|
119
|
+
def print_warning(message)
|
|
120
|
+
# Warnings are always shown
|
|
121
|
+
$stderr.puts "#{pastel.yellow(ICONS[:warning])} #{message}"
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Print an info message
|
|
125
|
+
# @param message [String] Message
|
|
126
|
+
def print_info_message(message)
|
|
127
|
+
return if quiet?
|
|
128
|
+
|
|
129
|
+
puts "#{pastel.blue(ICONS[:info])} #{message}"
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Print a list item with status
|
|
133
|
+
# @param text [String] Item text
|
|
134
|
+
# @param status [Symbol] :success, :error, :warning, :pending
|
|
135
|
+
# @param indent [Integer] Indentation level
|
|
136
|
+
def print_list_item(text, status: :pending, indent: 1)
|
|
137
|
+
return if quiet?
|
|
138
|
+
|
|
139
|
+
prefix = " " * indent
|
|
140
|
+
icon = case status
|
|
141
|
+
when :success then pastel.green(ICONS[:check])
|
|
142
|
+
when :error then pastel.red(ICONS[:cross])
|
|
143
|
+
when :warning then pastel.yellow(ICONS[:warning])
|
|
144
|
+
else pastel.dim(ICONS[:bullet])
|
|
145
|
+
end
|
|
146
|
+
puts "#{prefix}#{icon} #{text}"
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Print a completion summary box (always shown, even in quiet mode)
|
|
150
|
+
# @param title [String] Summary title
|
|
151
|
+
# @param stats [Hash] Statistics to display
|
|
152
|
+
# @param status [Symbol] :success, :warning, :error
|
|
153
|
+
def print_summary(title, stats, status: :success)
|
|
154
|
+
# Summary is always shown (it's the final result)
|
|
155
|
+
puts
|
|
156
|
+
color = case status
|
|
157
|
+
when :success then :green
|
|
158
|
+
when :warning then :yellow
|
|
159
|
+
when :error then :red
|
|
160
|
+
else :white
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Calculate box width based on content
|
|
164
|
+
width = 40
|
|
165
|
+
title_line = " #{title}"
|
|
166
|
+
content_lines = stats.map { |k, v| " #{k}: #{v}" }
|
|
167
|
+
|
|
168
|
+
# Draw box
|
|
169
|
+
puts pastel.send(color, "┌#{"─" * width}┐")
|
|
170
|
+
puts pastel.send(color, "│") + pastel.bold(title_line.ljust(width)) + pastel.send(color, "│")
|
|
171
|
+
puts pastel.send(color, "├#{"─" * width}┤")
|
|
172
|
+
|
|
173
|
+
content_lines.each do |line|
|
|
174
|
+
puts pastel.send(color, "│") + line.ljust(width) + pastel.send(color, "│")
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
puts pastel.send(color, "└#{"─" * width}┘")
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Print elapsed time
|
|
181
|
+
# @param seconds [Float] Elapsed seconds
|
|
182
|
+
def print_elapsed_time(seconds)
|
|
183
|
+
return if quiet?
|
|
184
|
+
|
|
185
|
+
formatted = format_duration(seconds)
|
|
186
|
+
puts pastel.dim("Completed in #{formatted}")
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Format duration in human-readable form
|
|
190
|
+
# @param seconds [Float] Duration in seconds
|
|
191
|
+
# @return [String] Formatted duration
|
|
192
|
+
def format_duration(seconds)
|
|
193
|
+
if seconds < 60
|
|
194
|
+
"#{seconds.round(1)}s"
|
|
195
|
+
elsif seconds < 3600
|
|
196
|
+
mins = (seconds / 60).floor
|
|
197
|
+
secs = (seconds % 60).round
|
|
198
|
+
"#{mins}m #{secs}s"
|
|
199
|
+
else
|
|
200
|
+
hours = (seconds / 3600).floor
|
|
201
|
+
mins = ((seconds % 3600) / 60).floor
|
|
202
|
+
"#{hours}h #{mins}m"
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Format file size in human-readable form
|
|
207
|
+
# @param bytes [Integer] Size in bytes
|
|
208
|
+
# @return [String] Formatted size
|
|
209
|
+
def format_size(bytes)
|
|
210
|
+
Wp2txt.format_file_size(bytes)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# Format ETA (Estimated Time of Arrival) in HH:MM:SS format
|
|
214
|
+
# @param seconds [Numeric] Remaining seconds
|
|
215
|
+
# @return [String] Formatted ETA or "--:--:--" if nil/invalid
|
|
216
|
+
def format_eta(seconds)
|
|
217
|
+
return "--:--:--" if seconds.nil? || seconds.negative? || !seconds.finite?
|
|
218
|
+
|
|
219
|
+
seconds = seconds.to_i
|
|
220
|
+
hours = seconds / 3600
|
|
221
|
+
mins = (seconds % 3600) / 60
|
|
222
|
+
secs = seconds % 60
|
|
223
|
+
|
|
224
|
+
if hours > 99
|
|
225
|
+
">99h"
|
|
226
|
+
elsif hours > 0
|
|
227
|
+
format("%02d:%02d:%02d", hours, mins, secs)
|
|
228
|
+
else
|
|
229
|
+
format("%02d:%02d", mins, secs)
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Calculate ETA based on current progress
|
|
234
|
+
# @param processed [Integer] Items processed so far
|
|
235
|
+
# @param total [Integer] Total items to process
|
|
236
|
+
# @param elapsed_seconds [Numeric] Time elapsed in seconds
|
|
237
|
+
# @return [Float, nil] Estimated seconds remaining, or nil if cannot calculate
|
|
238
|
+
def calculate_eta(processed, total, elapsed_seconds)
|
|
239
|
+
return nil if processed.zero? || total.nil? || total.zero?
|
|
240
|
+
return nil if processed > total
|
|
241
|
+
|
|
242
|
+
rate = processed.to_f / elapsed_seconds
|
|
243
|
+
return nil if rate.zero?
|
|
244
|
+
|
|
245
|
+
remaining = total - processed
|
|
246
|
+
remaining / rate
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# Estimate total article count from multistream index or file size
|
|
250
|
+
# @param input_path [String] Path to input file
|
|
251
|
+
# @return [Integer, nil] Estimated total articles, or nil if cannot estimate
|
|
252
|
+
def estimate_total_articles(input_path)
|
|
253
|
+
return nil unless input_path
|
|
254
|
+
|
|
255
|
+
# Check for multistream index file
|
|
256
|
+
index_path = find_multistream_index(input_path)
|
|
257
|
+
if index_path && File.exist?(index_path)
|
|
258
|
+
count = count_articles_from_index(index_path)
|
|
259
|
+
return count if count && count > 0
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# Fallback: estimate from file size
|
|
263
|
+
# English Wikipedia: ~25GB compressed → ~7M articles ≈ 280 articles/MB
|
|
264
|
+
# Other languages vary, but this gives a reasonable estimate
|
|
265
|
+
estimate_from_file_size(input_path)
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# Find multistream index file for a given input file
|
|
269
|
+
# @param input_path [String] Path to multistream file
|
|
270
|
+
# @return [String, nil] Path to index file, or nil if not found
|
|
271
|
+
def find_multistream_index(input_path)
|
|
272
|
+
return nil unless input_path.include?("multistream")
|
|
273
|
+
|
|
274
|
+
# Pattern: *-multistream.xml.bz2 → *-multistream-index.txt.bz2
|
|
275
|
+
base = input_path.sub(/multistream\.xml\.bz2$/, "multistream-index.txt.bz2")
|
|
276
|
+
return base if File.exist?(base)
|
|
277
|
+
|
|
278
|
+
# Try alternate patterns
|
|
279
|
+
dir = File.dirname(input_path)
|
|
280
|
+
basename = File.basename(input_path)
|
|
281
|
+
|
|
282
|
+
# Extract language and date from filename
|
|
283
|
+
if basename =~ /^(\w+wiki)-(\d+)-/
|
|
284
|
+
lang = $1
|
|
285
|
+
date = $2
|
|
286
|
+
alt_path = File.join(dir, "#{lang}-#{date}-pages-articles-multistream-index.txt.bz2")
|
|
287
|
+
return alt_path if File.exist?(alt_path)
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
nil
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# Count articles from multistream index file (quick count, not full load)
|
|
294
|
+
# @param index_path [String] Path to index file
|
|
295
|
+
# @return [Integer, nil] Article count, or nil if cannot count
|
|
296
|
+
def count_articles_from_index(index_path)
|
|
297
|
+
count = 0
|
|
298
|
+
|
|
299
|
+
begin
|
|
300
|
+
if index_path.end_with?(".bz2")
|
|
301
|
+
IO.popen(["bzcat", index_path], "r") do |io|
|
|
302
|
+
io.each_line { count += 1 }
|
|
303
|
+
end
|
|
304
|
+
else
|
|
305
|
+
File.foreach(index_path) { count += 1 }
|
|
306
|
+
end
|
|
307
|
+
count
|
|
308
|
+
rescue StandardError
|
|
309
|
+
nil
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# Estimate article count from file size
|
|
314
|
+
# @param input_path [String] Path to input file
|
|
315
|
+
# @return [Integer, nil] Estimated article count
|
|
316
|
+
def estimate_from_file_size(input_path)
|
|
317
|
+
return nil unless File.exist?(input_path)
|
|
318
|
+
|
|
319
|
+
size_mb = File.size(input_path) / (1024.0 * 1024)
|
|
320
|
+
|
|
321
|
+
# Empirical estimates (articles per MB of compressed bz2):
|
|
322
|
+
# - English Wikipedia: ~280 articles/MB
|
|
323
|
+
# - Japanese Wikipedia: ~100 articles/MB (longer articles on average)
|
|
324
|
+
# - Other languages: ~200 articles/MB (rough average)
|
|
325
|
+
# Using conservative estimate of 200 articles/MB
|
|
326
|
+
(size_mb * 200).to_i
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
# Create a spinner with consistent styling
|
|
330
|
+
# @param message [String] Spinner message
|
|
331
|
+
# @return [TTY::Spinner, NullSpinner] Configured spinner or null spinner in quiet mode
|
|
332
|
+
def create_spinner(message)
|
|
333
|
+
return NullSpinner.new if quiet?
|
|
334
|
+
|
|
335
|
+
TTY::Spinner.new(
|
|
336
|
+
"[:spinner] #{message}",
|
|
337
|
+
format: :dots,
|
|
338
|
+
hide_cursor: true
|
|
339
|
+
)
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
# Create a progress bar with consistent styling
|
|
343
|
+
# @param message [String] Progress message
|
|
344
|
+
# @param total [Integer] Total count
|
|
345
|
+
# @return [TTY::ProgressBar, NullProgressBar] Configured progress bar or null in quiet mode
|
|
346
|
+
def create_progress_bar(message, total)
|
|
347
|
+
return NullProgressBar.new if quiet?
|
|
348
|
+
|
|
349
|
+
TTY::ProgressBar.new(
|
|
350
|
+
"#{message} [:bar] :percent (:current/:total) :eta",
|
|
351
|
+
total: total,
|
|
352
|
+
bar_format: :block,
|
|
353
|
+
width: 30,
|
|
354
|
+
hide_cursor: true
|
|
355
|
+
)
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
# Create a download progress bar
|
|
359
|
+
# @param filename [String] File being downloaded
|
|
360
|
+
# @param total_bytes [Integer] Total size in bytes
|
|
361
|
+
# @return [TTY::ProgressBar, NullProgressBar] Configured progress bar or null in quiet mode
|
|
362
|
+
def create_download_bar(filename, total_bytes)
|
|
363
|
+
return NullProgressBar.new if quiet?
|
|
364
|
+
|
|
365
|
+
size_str = format_size(total_bytes)
|
|
366
|
+
TTY::ProgressBar.new(
|
|
367
|
+
" #{filename} [:bar] :percent (:eta)",
|
|
368
|
+
total: total_bytes,
|
|
369
|
+
bar_format: :block,
|
|
370
|
+
width: 25,
|
|
371
|
+
hide_cursor: true,
|
|
372
|
+
unknown: "#{size_str} (size unknown)"
|
|
373
|
+
)
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
# Prompt for confirmation
|
|
377
|
+
# @param message [String] Prompt message
|
|
378
|
+
# @param default [Boolean] Default response
|
|
379
|
+
# @return [Boolean] User response
|
|
380
|
+
def confirm?(message, default: false)
|
|
381
|
+
return default unless $stdin.tty?
|
|
382
|
+
|
|
383
|
+
suffix = default ? "[Y/n]" : "[y/N]"
|
|
384
|
+
print "#{message} #{suffix}: "
|
|
385
|
+
|
|
386
|
+
response = $stdin.gets&.strip&.downcase
|
|
387
|
+
return default if response.nil? || response.empty?
|
|
388
|
+
|
|
389
|
+
%w[y yes].include?(response)
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
# Print a mode banner
|
|
393
|
+
# @param mode [String] Mode name
|
|
394
|
+
# @param details [Hash] Mode details
|
|
395
|
+
def print_mode_banner(mode, details = {})
|
|
396
|
+
return if quiet?
|
|
397
|
+
|
|
398
|
+
puts
|
|
399
|
+
puts pastel.cyan.bold("═" * 50)
|
|
400
|
+
puts pastel.cyan.bold(" #{mode}")
|
|
401
|
+
puts pastel.cyan.bold("═" * 50)
|
|
402
|
+
puts
|
|
403
|
+
|
|
404
|
+
details.each do |key, value|
|
|
405
|
+
print_info(key.to_s, value.to_s)
|
|
406
|
+
end
|
|
407
|
+
puts
|
|
408
|
+
end
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
# Null spinner for quiet mode (does nothing)
|
|
412
|
+
class NullSpinner
|
|
413
|
+
def auto_spin; end
|
|
414
|
+
def success(_msg = nil); end
|
|
415
|
+
def error(_msg = nil); end
|
|
416
|
+
def stop; end
|
|
417
|
+
def update(**_options); end
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
# Null progress bar for quiet mode (does nothing)
|
|
421
|
+
class NullProgressBar
|
|
422
|
+
def advance(_count = 1); end
|
|
423
|
+
def finish; end
|
|
424
|
+
def current=(_value); end
|
|
425
|
+
def start; end
|
|
426
|
+
def stop; end
|
|
427
|
+
end
|
|
428
|
+
end
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "yaml"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
|
|
6
|
+
module Wp2txt
|
|
7
|
+
# Configuration management for wp2txt
|
|
8
|
+
# Loads and saves settings from ~/.wp2txt/config.yml
|
|
9
|
+
class Config
|
|
10
|
+
# Default configuration file path
|
|
11
|
+
DEFAULT_CONFIG_PATH = File.expand_path("~/.wp2txt/config.yml")
|
|
12
|
+
# Default cache directory
|
|
13
|
+
DEFAULT_CACHE_DIR = File.expand_path("~/.wp2txt/cache")
|
|
14
|
+
|
|
15
|
+
# Validation ranges
|
|
16
|
+
DUMP_EXPIRY_RANGE = (1..365)
|
|
17
|
+
CATEGORY_EXPIRY_RANGE = (1..90)
|
|
18
|
+
DEPTH_RANGE = (0..10)
|
|
19
|
+
VALID_FORMATS = %w[text json].freeze
|
|
20
|
+
|
|
21
|
+
# Default values
|
|
22
|
+
DEFAULTS = {
|
|
23
|
+
dump_expiry_days: 30,
|
|
24
|
+
category_expiry_days: 7,
|
|
25
|
+
cache_directory: DEFAULT_CACHE_DIR,
|
|
26
|
+
default_format: "text",
|
|
27
|
+
default_depth: 0
|
|
28
|
+
}.freeze
|
|
29
|
+
|
|
30
|
+
attr_reader :dump_expiry_days, :category_expiry_days, :cache_directory,
|
|
31
|
+
:default_format, :default_depth
|
|
32
|
+
|
|
33
|
+
def initialize(
|
|
34
|
+
dump_expiry_days: DEFAULTS[:dump_expiry_days],
|
|
35
|
+
category_expiry_days: DEFAULTS[:category_expiry_days],
|
|
36
|
+
cache_directory: DEFAULTS[:cache_directory],
|
|
37
|
+
default_format: DEFAULTS[:default_format],
|
|
38
|
+
default_depth: DEFAULTS[:default_depth]
|
|
39
|
+
)
|
|
40
|
+
@dump_expiry_days = clamp(dump_expiry_days.to_i, DUMP_EXPIRY_RANGE)
|
|
41
|
+
@category_expiry_days = clamp(category_expiry_days.to_i, CATEGORY_EXPIRY_RANGE)
|
|
42
|
+
@cache_directory = cache_directory.to_s.empty? ? DEFAULT_CACHE_DIR : cache_directory.to_s
|
|
43
|
+
@default_format = validate_format(default_format.to_s)
|
|
44
|
+
@default_depth = clamp(default_depth.to_i, DEPTH_RANGE)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Load configuration from file
|
|
48
|
+
# @param path [String] Path to config file (default: ~/.wp2txt/config.yml)
|
|
49
|
+
# @return [Config] Configuration object
|
|
50
|
+
def self.load(path = default_path)
|
|
51
|
+
return new unless File.exist?(path)
|
|
52
|
+
|
|
53
|
+
begin
|
|
54
|
+
data = YAML.safe_load(File.read(path), symbolize_names: true) || {}
|
|
55
|
+
from_hash(data)
|
|
56
|
+
rescue Psych::SyntaxError, StandardError
|
|
57
|
+
# Return defaults on parse error
|
|
58
|
+
new
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Create Config from hash
|
|
63
|
+
# @param data [Hash] Configuration hash
|
|
64
|
+
# @return [Config] Configuration object
|
|
65
|
+
def self.from_hash(data)
|
|
66
|
+
cache = data[:cache] || {}
|
|
67
|
+
defaults = data[:defaults] || {}
|
|
68
|
+
|
|
69
|
+
new(
|
|
70
|
+
dump_expiry_days: cache[:dump_expiry_days] || DEFAULTS[:dump_expiry_days],
|
|
71
|
+
category_expiry_days: cache[:category_expiry_days] || DEFAULTS[:category_expiry_days],
|
|
72
|
+
cache_directory: cache[:directory] || DEFAULTS[:cache_directory],
|
|
73
|
+
default_format: defaults[:format] || DEFAULTS[:default_format],
|
|
74
|
+
default_depth: defaults[:depth] || DEFAULTS[:default_depth]
|
|
75
|
+
)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Default configuration file path
|
|
79
|
+
# @return [String] Path to default config file
|
|
80
|
+
def self.default_path
|
|
81
|
+
DEFAULT_CONFIG_PATH
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Create default configuration file
|
|
85
|
+
# @param path [String] Path to config file
|
|
86
|
+
# @param force [Boolean] Overwrite existing file
|
|
87
|
+
# @return [Boolean] True if file was created
|
|
88
|
+
def self.create_default(path = default_path, force: false)
|
|
89
|
+
return false if File.exist?(path) && !force
|
|
90
|
+
|
|
91
|
+
config = new
|
|
92
|
+
config.save(path)
|
|
93
|
+
true
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Save configuration to file
|
|
97
|
+
# @param path [String] Path to config file
|
|
98
|
+
def save(path = self.class.default_path)
|
|
99
|
+
FileUtils.mkdir_p(File.dirname(path))
|
|
100
|
+
|
|
101
|
+
content = generate_yaml
|
|
102
|
+
File.write(path, content)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Convert to hash representation
|
|
106
|
+
# @return [Hash] Configuration as hash
|
|
107
|
+
def to_h
|
|
108
|
+
{
|
|
109
|
+
cache: {
|
|
110
|
+
dump_expiry_days: @dump_expiry_days,
|
|
111
|
+
category_expiry_days: @category_expiry_days,
|
|
112
|
+
directory: @cache_directory
|
|
113
|
+
},
|
|
114
|
+
defaults: {
|
|
115
|
+
format: @default_format,
|
|
116
|
+
depth: @default_depth
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
private
|
|
122
|
+
|
|
123
|
+
# Clamp value to range
|
|
124
|
+
def clamp(value, range)
|
|
125
|
+
[[value, range.min].max, range.max].min
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Validate format string
|
|
129
|
+
def validate_format(format)
|
|
130
|
+
VALID_FORMATS.include?(format) ? format : DEFAULTS[:default_format]
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Generate YAML content with comments
|
|
134
|
+
def generate_yaml
|
|
135
|
+
<<~YAML
|
|
136
|
+
# WP2TXT Configuration File
|
|
137
|
+
# Location: ~/.wp2txt/config.yml
|
|
138
|
+
|
|
139
|
+
cache:
|
|
140
|
+
# Number of days before dump files are considered stale (1-365)
|
|
141
|
+
dump_expiry_days: #{@dump_expiry_days}
|
|
142
|
+
|
|
143
|
+
# Number of days before category cache expires (1-90)
|
|
144
|
+
category_expiry_days: #{@category_expiry_days}
|
|
145
|
+
|
|
146
|
+
# Cache directory for downloaded dumps
|
|
147
|
+
directory: #{@cache_directory}
|
|
148
|
+
|
|
149
|
+
defaults:
|
|
150
|
+
# Default output format: text or json
|
|
151
|
+
format: #{@default_format}
|
|
152
|
+
|
|
153
|
+
# Default subcategory recursion depth (0-10)
|
|
154
|
+
depth: #{@default_depth}
|
|
155
|
+
YAML
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|