wp2txt 1.1.3 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.dockerignore +12 -0
- data/.github/workflows/ci.yml +13 -13
- data/.gitignore +14 -0
- data/CHANGELOG.md +284 -0
- data/DEVELOPMENT.md +415 -0
- data/DEVELOPMENT_ja.md +415 -0
- data/Dockerfile +19 -10
- data/Gemfile +2 -8
- data/README.md +259 -123
- data/README_ja.md +375 -0
- data/Rakefile +4 -0
- data/bin/wp2txt +863 -161
- data/lib/wp2txt/article.rb +98 -13
- data/lib/wp2txt/bz2_validator.rb +239 -0
- data/lib/wp2txt/category_cache.rb +313 -0
- data/lib/wp2txt/cli.rb +319 -0
- data/lib/wp2txt/cli_ui.rb +428 -0
- data/lib/wp2txt/config.rb +158 -0
- data/lib/wp2txt/constants.rb +134 -0
- data/lib/wp2txt/data/html_entities.json +2135 -0
- data/lib/wp2txt/data/language_metadata.json +4769 -0
- data/lib/wp2txt/data/language_tiers.json +59 -0
- data/lib/wp2txt/data/mediawiki_aliases.json +12366 -0
- data/lib/wp2txt/data/template_aliases.json +193 -0
- data/lib/wp2txt/data/wikipedia_entities.json +12 -0
- data/lib/wp2txt/extractor.rb +545 -0
- data/lib/wp2txt/file_utils.rb +91 -0
- data/lib/wp2txt/formatter.rb +352 -0
- data/lib/wp2txt/global_data_cache.rb +353 -0
- data/lib/wp2txt/index_cache.rb +258 -0
- data/lib/wp2txt/magic_words.rb +353 -0
- data/lib/wp2txt/memory_monitor.rb +236 -0
- data/lib/wp2txt/multistream.rb +1383 -0
- data/lib/wp2txt/output_writer.rb +182 -0
- data/lib/wp2txt/parser_functions.rb +606 -0
- data/lib/wp2txt/ractor_worker.rb +215 -0
- data/lib/wp2txt/regex.rb +396 -12
- data/lib/wp2txt/section_extractor.rb +354 -0
- data/lib/wp2txt/stream_processor.rb +271 -0
- data/lib/wp2txt/template_expander.rb +830 -0
- data/lib/wp2txt/text_processing.rb +337 -0
- data/lib/wp2txt/utils.rb +629 -270
- data/lib/wp2txt/version.rb +1 -1
- data/lib/wp2txt.rb +53 -26
- data/scripts/benchmark_regex.rb +161 -0
- data/scripts/fetch_html_entities.rb +94 -0
- data/scripts/fetch_language_metadata.rb +180 -0
- data/scripts/fetch_mediawiki_data.rb +334 -0
- data/scripts/fetch_template_data.rb +186 -0
- data/scripts/profile_memory.rb +139 -0
- data/spec/article_spec.rb +402 -0
- data/spec/auto_download_spec.rb +314 -0
- data/spec/bz2_validator_spec.rb +193 -0
- data/spec/category_cache_spec.rb +226 -0
- data/spec/category_fetcher_spec.rb +504 -0
- data/spec/cleanup_spec.rb +197 -0
- data/spec/cli_options_spec.rb +678 -0
- data/spec/cli_spec.rb +876 -0
- data/spec/config_spec.rb +194 -0
- data/spec/constants_spec.rb +138 -0
- data/spec/file_utils_spec.rb +170 -0
- data/spec/fixtures/samples.rb +181 -0
- data/spec/formatter_sections_spec.rb +382 -0
- data/spec/global_data_cache_spec.rb +186 -0
- data/spec/index_cache_spec.rb +210 -0
- data/spec/integration_spec.rb +543 -0
- data/spec/magic_words_spec.rb +261 -0
- data/spec/markers_spec.rb +476 -0
- data/spec/memory_monitor_spec.rb +192 -0
- data/spec/multistream_spec.rb +690 -0
- data/spec/output_writer_spec.rb +400 -0
- data/spec/parser_functions_spec.rb +455 -0
- data/spec/ractor_worker_spec.rb +197 -0
- data/spec/regex_spec.rb +281 -0
- data/spec/section_extractor_spec.rb +397 -0
- data/spec/spec_helper.rb +63 -0
- data/spec/stream_processor_spec.rb +579 -0
- data/spec/template_data_spec.rb +246 -0
- data/spec/template_expander_spec.rb +472 -0
- data/spec/template_processing_spec.rb +217 -0
- data/spec/text_processing_spec.rb +312 -0
- data/spec/utils_spec.rb +195 -16
- data/spec/wp2txt_spec.rb +510 -0
- data/wp2txt.gemspec +5 -3
- metadata +146 -18
- data/.rubocop.yml +0 -80
- data/data/output_samples/testdata_en.txt +0 -23002
- data/data/output_samples/testdata_en_category.txt +0 -132
- data/data/output_samples/testdata_en_summary.txt +0 -1376
- data/data/output_samples/testdata_ja.txt +0 -22774
- data/data/output_samples/testdata_ja_category.txt +0 -206
- data/data/output_samples/testdata_ja_summary.txt +0 -1560
- data/data/testdata_en.bz2 +0 -0
- data/data/testdata_ja.bz2 +0 -0
- data/image/screenshot.png +0 -0
|
@@ -0,0 +1,830 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Wp2txt
|
|
4
|
+
# Expands common MediaWiki templates to their text representation
|
|
5
|
+
# Handles date templates, convert templates, and other common patterns
|
|
6
|
+
class TemplateExpander
|
|
7
|
+
MONTH_NAMES = %w[
|
|
8
|
+
January February March April May June
|
|
9
|
+
July August September October November December
|
|
10
|
+
].freeze
|
|
11
|
+
|
|
12
|
+
# Unit conversion factors
|
|
13
|
+
CONVERSIONS = {
|
|
14
|
+
# Length
|
|
15
|
+
["km", "mi"] => 0.621371,
|
|
16
|
+
["mi", "km"] => 1.60934,
|
|
17
|
+
["m", "ft"] => 3.28084,
|
|
18
|
+
["ft", "m"] => 0.3048,
|
|
19
|
+
["cm", "in"] => 0.393701,
|
|
20
|
+
["in", "cm"] => 2.54,
|
|
21
|
+
["mm", "in"] => 0.0393701,
|
|
22
|
+
["in", "mm"] => 25.4,
|
|
23
|
+
["yd", "m"] => 0.9144,
|
|
24
|
+
["m", "yd"] => 1.09361,
|
|
25
|
+
# Weight
|
|
26
|
+
["kg", "lb"] => 2.20462,
|
|
27
|
+
["lb", "kg"] => 0.453592,
|
|
28
|
+
["g", "oz"] => 0.035274,
|
|
29
|
+
["oz", "g"] => 28.3495,
|
|
30
|
+
["t", "lb"] => 2204.62,
|
|
31
|
+
["lb", "t"] => 0.000453592,
|
|
32
|
+
# Temperature (special handling)
|
|
33
|
+
["C", "F"] => :celsius_to_fahrenheit,
|
|
34
|
+
["°C", "°F"] => :celsius_to_fahrenheit,
|
|
35
|
+
["F", "C"] => :fahrenheit_to_celsius,
|
|
36
|
+
["°F", "°C"] => :fahrenheit_to_celsius,
|
|
37
|
+
# Area
|
|
38
|
+
["km2", "sqmi"] => 0.386102,
|
|
39
|
+
["sqmi", "km2"] => 2.58999,
|
|
40
|
+
["ha", "acre"] => 2.47105,
|
|
41
|
+
["acre", "ha"] => 0.404686,
|
|
42
|
+
["m2", "sqft"] => 10.7639,
|
|
43
|
+
["sqft", "m2"] => 0.092903,
|
|
44
|
+
# Speed
|
|
45
|
+
["km/h", "mph"] => 0.621371,
|
|
46
|
+
["mph", "km/h"] => 1.60934,
|
|
47
|
+
["m/s", "km/h"] => 3.6,
|
|
48
|
+
["km/h", "m/s"] => 0.277778,
|
|
49
|
+
# Volume
|
|
50
|
+
["l", "gal"] => 0.264172,
|
|
51
|
+
["gal", "l"] => 3.78541,
|
|
52
|
+
["ml", "floz"] => 0.033814,
|
|
53
|
+
["floz", "ml"] => 29.5735
|
|
54
|
+
}.freeze
|
|
55
|
+
|
|
56
|
+
# Unit display names
|
|
57
|
+
UNIT_DISPLAY = {
|
|
58
|
+
"km" => "km",
|
|
59
|
+
"mi" => "mi",
|
|
60
|
+
"m" => "m",
|
|
61
|
+
"ft" => "ft",
|
|
62
|
+
"cm" => "cm",
|
|
63
|
+
"in" => "in",
|
|
64
|
+
"mm" => "mm",
|
|
65
|
+
"yd" => "yd",
|
|
66
|
+
"kg" => "kg",
|
|
67
|
+
"lb" => "lb",
|
|
68
|
+
"g" => "g",
|
|
69
|
+
"oz" => "oz",
|
|
70
|
+
"t" => "t",
|
|
71
|
+
"C" => "°C",
|
|
72
|
+
"°C" => "°C",
|
|
73
|
+
"F" => "°F",
|
|
74
|
+
"°F" => "°F",
|
|
75
|
+
"km2" => "km²",
|
|
76
|
+
"sqmi" => "sq mi",
|
|
77
|
+
"ha" => "ha",
|
|
78
|
+
"acre" => "acres",
|
|
79
|
+
"m2" => "m²",
|
|
80
|
+
"sqft" => "sq ft",
|
|
81
|
+
"km/h" => "km/h",
|
|
82
|
+
"mph" => "mph",
|
|
83
|
+
"m/s" => "m/s",
|
|
84
|
+
"l" => "L",
|
|
85
|
+
"gal" => "gal",
|
|
86
|
+
"ml" => "mL",
|
|
87
|
+
"floz" => "fl oz"
|
|
88
|
+
}.freeze
|
|
89
|
+
|
|
90
|
+
def initialize(reference_date: nil, preserve_unknown: false)
|
|
91
|
+
@reference_date = reference_date || Time.now
|
|
92
|
+
@preserve_unknown = preserve_unknown
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Main expansion method
|
|
96
|
+
def expand(text)
|
|
97
|
+
return text if text.nil? || text.empty?
|
|
98
|
+
|
|
99
|
+
# Early exit: no templates to expand
|
|
100
|
+
return text unless text.include?("{{")
|
|
101
|
+
|
|
102
|
+
result = text.dup
|
|
103
|
+
|
|
104
|
+
# Process templates from innermost to outermost
|
|
105
|
+
max_iterations = 10
|
|
106
|
+
iteration = 0
|
|
107
|
+
|
|
108
|
+
while result.include?("{{") && iteration < max_iterations
|
|
109
|
+
previous = result.dup
|
|
110
|
+
result = expand_templates_single_pass(result)
|
|
111
|
+
break if result == previous
|
|
112
|
+
iteration += 1
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
result
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
private
|
|
119
|
+
|
|
120
|
+
def expand_templates_single_pass(text)
|
|
121
|
+
result = +""
|
|
122
|
+
pos = 0
|
|
123
|
+
|
|
124
|
+
while pos < text.length
|
|
125
|
+
start_idx = text.index("{{", pos)
|
|
126
|
+
|
|
127
|
+
if start_idx.nil?
|
|
128
|
+
result << text[pos..]
|
|
129
|
+
break
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Add text before template
|
|
133
|
+
result << text[pos...start_idx]
|
|
134
|
+
|
|
135
|
+
# Find matching }}
|
|
136
|
+
end_idx = find_template_end(text, start_idx + 2)
|
|
137
|
+
|
|
138
|
+
if end_idx.nil?
|
|
139
|
+
# No matching }}, treat as plain text
|
|
140
|
+
result << text[start_idx..]
|
|
141
|
+
break
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
template_content = text[(start_idx + 2)...end_idx]
|
|
145
|
+
expanded = expand_single_template(template_content)
|
|
146
|
+
result << expanded
|
|
147
|
+
|
|
148
|
+
pos = end_idx + 2
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
result
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def find_template_end(text, start_pos)
|
|
155
|
+
depth = 1
|
|
156
|
+
pos = start_pos
|
|
157
|
+
|
|
158
|
+
while pos < text.length - 1
|
|
159
|
+
if text[pos, 2] == "{{"
|
|
160
|
+
depth += 1
|
|
161
|
+
pos += 2
|
|
162
|
+
elsif text[pos, 2] == "}}"
|
|
163
|
+
depth -= 1
|
|
164
|
+
return pos if depth == 0
|
|
165
|
+
pos += 2
|
|
166
|
+
else
|
|
167
|
+
pos += 1
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
nil
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def expand_single_template(content)
|
|
175
|
+
parts = split_template_parts(content)
|
|
176
|
+
return "" if parts.empty?
|
|
177
|
+
|
|
178
|
+
template_name = parts[0].strip.downcase
|
|
179
|
+
params = parse_template_params(parts[1..])
|
|
180
|
+
|
|
181
|
+
case template_name
|
|
182
|
+
# Date templates
|
|
183
|
+
when "birth date", "birthdate"
|
|
184
|
+
format_date(params, style: :mdy)
|
|
185
|
+
when "birth date and age", "birthdate and age"
|
|
186
|
+
format_date_with_age(params, style: :mdy, age_label: "age")
|
|
187
|
+
when "death date", "deathdate"
|
|
188
|
+
format_date(params, style: :mdy)
|
|
189
|
+
when "death date and age", "deathdate and age"
|
|
190
|
+
format_death_date_with_age(params)
|
|
191
|
+
when "start date", "startdate"
|
|
192
|
+
format_date(params, style: :mdy)
|
|
193
|
+
when "end date", "enddate"
|
|
194
|
+
format_date(params, style: :mdy)
|
|
195
|
+
when "date"
|
|
196
|
+
format_simple_date(params)
|
|
197
|
+
|
|
198
|
+
# Age templates
|
|
199
|
+
when "age"
|
|
200
|
+
calculate_age(params)
|
|
201
|
+
when "age in years"
|
|
202
|
+
calculate_age_between_dates(params)
|
|
203
|
+
when "age in days"
|
|
204
|
+
calculate_days_between(params)
|
|
205
|
+
when "age in years and days"
|
|
206
|
+
calculate_age_years_and_days(params)
|
|
207
|
+
when "time ago"
|
|
208
|
+
format_time_ago(params)
|
|
209
|
+
|
|
210
|
+
# Convert template
|
|
211
|
+
when "convert", "cvt"
|
|
212
|
+
expand_convert(params)
|
|
213
|
+
|
|
214
|
+
# Common templates
|
|
215
|
+
when "circa", "c."
|
|
216
|
+
expand_circa(params)
|
|
217
|
+
when "floruit", "fl."
|
|
218
|
+
expand_floruit(params)
|
|
219
|
+
when "reign", "r."
|
|
220
|
+
expand_reign(params)
|
|
221
|
+
when "marriage", "married"
|
|
222
|
+
expand_marriage(params)
|
|
223
|
+
when "played years"
|
|
224
|
+
expand_year_range(params)
|
|
225
|
+
|
|
226
|
+
# Coordinate template
|
|
227
|
+
when "coord", "coordinate", "coordinates"
|
|
228
|
+
expand_coord(params)
|
|
229
|
+
|
|
230
|
+
# Language templates
|
|
231
|
+
when "lang"
|
|
232
|
+
expand_lang(params)
|
|
233
|
+
when "transl"
|
|
234
|
+
expand_transl(params)
|
|
235
|
+
when "nihongo"
|
|
236
|
+
expand_nihongo(params)
|
|
237
|
+
|
|
238
|
+
# Formatting templates (pass through text)
|
|
239
|
+
when "nowrap", "nobr"
|
|
240
|
+
params[:positional][0] || ""
|
|
241
|
+
when "small", "smaller"
|
|
242
|
+
params[:positional][0] || ""
|
|
243
|
+
when "em", "bold", "strong"
|
|
244
|
+
params[:positional][0] || ""
|
|
245
|
+
when "abbr", "abbrlink"
|
|
246
|
+
params[:positional][0] || ""
|
|
247
|
+
when "blockquote", "quote", "cquote", "quotation"
|
|
248
|
+
expand_blockquote(params)
|
|
249
|
+
when "frac", "fraction", "sfrac"
|
|
250
|
+
expand_fraction(params)
|
|
251
|
+
when "sub", "sup"
|
|
252
|
+
params[:positional][0] || ""
|
|
253
|
+
when "wikt", "wiktionary"
|
|
254
|
+
params[:positional][1] || params[:positional][0] || ""
|
|
255
|
+
when "sic"
|
|
256
|
+
"[sic]"
|
|
257
|
+
when "as of"
|
|
258
|
+
expand_as_of(params)
|
|
259
|
+
when "age", "birth year and age", "death year and age"
|
|
260
|
+
calculate_age(params)
|
|
261
|
+
|
|
262
|
+
else
|
|
263
|
+
# Handle lang-xx templates (e.g., lang-fr, lang-de, lang-ja)
|
|
264
|
+
if template_name.start_with?("lang-")
|
|
265
|
+
expand_lang_xx(template_name, params)
|
|
266
|
+
else
|
|
267
|
+
@preserve_unknown ? "{{#{content}}}" : ""
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def split_template_parts(content)
|
|
273
|
+
parts = []
|
|
274
|
+
current = +""
|
|
275
|
+
depth = 0
|
|
276
|
+
|
|
277
|
+
content.each_char do |c|
|
|
278
|
+
if c == "{" || c == "["
|
|
279
|
+
depth += 1
|
|
280
|
+
current << c
|
|
281
|
+
elsif c == "}" || c == "]"
|
|
282
|
+
depth -= 1
|
|
283
|
+
current << c
|
|
284
|
+
elsif c == "|" && depth == 0
|
|
285
|
+
parts << current
|
|
286
|
+
current = +""
|
|
287
|
+
else
|
|
288
|
+
current << c
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
parts << current unless current.empty?
|
|
293
|
+
parts
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def parse_template_params(parts)
|
|
297
|
+
params = { positional: [] }
|
|
298
|
+
|
|
299
|
+
parts.each do |part|
|
|
300
|
+
# Check for named parameter (key=value)
|
|
301
|
+
# Only treat as named parameter if:
|
|
302
|
+
# 1. Contains '='
|
|
303
|
+
# 2. The key part looks like a valid parameter name (alphanumeric/underscore only)
|
|
304
|
+
# 3. Key doesn't contain HTML tags or other special chars
|
|
305
|
+
if part.include?("=")
|
|
306
|
+
key, value = part.split("=", 2)
|
|
307
|
+
key_stripped = key.strip
|
|
308
|
+
# Valid param name: only letters, digits, underscore, space
|
|
309
|
+
# Should NOT contain < > { } or other markup
|
|
310
|
+
if key_stripped.match?(/\A[\w\s]+\z/) && !key_stripped.match?(/[<>{}\[\]]/)
|
|
311
|
+
params[key_stripped.downcase] = value&.strip
|
|
312
|
+
else
|
|
313
|
+
# Treat as positional if key doesn't look valid
|
|
314
|
+
params[:positional] << part.strip
|
|
315
|
+
end
|
|
316
|
+
else
|
|
317
|
+
params[:positional] << part.strip
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
params
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# Date formatting methods
|
|
325
|
+
|
|
326
|
+
def format_date(params, style: :mdy)
|
|
327
|
+
pos = params[:positional]
|
|
328
|
+
return "" if pos.empty?
|
|
329
|
+
|
|
330
|
+
year = pos[0].to_i
|
|
331
|
+
month = pos[1]&.to_i
|
|
332
|
+
day = pos[2]&.to_i
|
|
333
|
+
|
|
334
|
+
# Check for df=yes (day first)
|
|
335
|
+
use_dmy = params["df"] == "yes" || params["df"] == "y"
|
|
336
|
+
|
|
337
|
+
format_date_parts(year, month, day, use_dmy ? :dmy : style)
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
def format_date_parts(year, month, day, style)
|
|
341
|
+
return year.to_s unless month && month > 0
|
|
342
|
+
|
|
343
|
+
month_name = MONTH_NAMES[month - 1]
|
|
344
|
+
return "#{month_name} #{year}" unless day && day > 0
|
|
345
|
+
|
|
346
|
+
case style
|
|
347
|
+
when :dmy
|
|
348
|
+
"#{day} #{month_name} #{year}"
|
|
349
|
+
else # :mdy
|
|
350
|
+
"#{month_name} #{day}, #{year}"
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
def format_simple_date(params)
|
|
355
|
+
pos = params[:positional]
|
|
356
|
+
return "" if pos.empty?
|
|
357
|
+
|
|
358
|
+
year = pos[0].to_i
|
|
359
|
+
month = pos[1]&.to_i
|
|
360
|
+
day = pos[2]&.to_i
|
|
361
|
+
|
|
362
|
+
format_date_parts(year, month, day, :mdy)
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
def format_date_with_age(params, style: :mdy, age_label: "age")
|
|
366
|
+
pos = params[:positional]
|
|
367
|
+
return "" if pos.empty?
|
|
368
|
+
|
|
369
|
+
year = pos[0].to_i
|
|
370
|
+
month = pos[1]&.to_i || 1
|
|
371
|
+
day = pos[2]&.to_i || 1
|
|
372
|
+
|
|
373
|
+
use_dmy = params["df"] == "yes" || params["df"] == "y"
|
|
374
|
+
|
|
375
|
+
date_str = format_date_parts(year, month, day, use_dmy ? :dmy : style)
|
|
376
|
+
age = calculate_age_from_parts(year, month, day, @reference_date)
|
|
377
|
+
|
|
378
|
+
"#{date_str} (#{age_label} #{age})"
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
def format_death_date_with_age(params)
|
|
382
|
+
pos = params[:positional]
|
|
383
|
+
return "" if pos.length < 6
|
|
384
|
+
|
|
385
|
+
death_year = pos[0].to_i
|
|
386
|
+
death_month = normalize_month(pos[1].to_i)
|
|
387
|
+
death_day = normalize_day(pos[2].to_i)
|
|
388
|
+
birth_year = pos[3].to_i
|
|
389
|
+
birth_month = normalize_month(pos[4].to_i)
|
|
390
|
+
birth_day = normalize_day(pos[5].to_i)
|
|
391
|
+
|
|
392
|
+
use_dmy = params["df"] == "yes" || params["df"] == "y"
|
|
393
|
+
|
|
394
|
+
date_str = format_date_parts(death_year, death_month, death_day, use_dmy ? :dmy : :mdy)
|
|
395
|
+
death_date = Time.new(death_year, death_month, death_day)
|
|
396
|
+
age = calculate_age_from_parts(birth_year, birth_month, birth_day, death_date)
|
|
397
|
+
|
|
398
|
+
age ? "#{date_str} (aged #{age})" : date_str
|
|
399
|
+
rescue ArgumentError
|
|
400
|
+
format_date_parts(death_year, death_month, death_day, use_dmy ? :dmy : :mdy)
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
# Age calculation methods
|
|
404
|
+
|
|
405
|
+
def calculate_age(params)
|
|
406
|
+
pos = params[:positional]
|
|
407
|
+
return "" if pos.empty?
|
|
408
|
+
|
|
409
|
+
year = pos[0].to_i
|
|
410
|
+
month = pos[1]&.to_i || 1
|
|
411
|
+
day = pos[2]&.to_i || 1
|
|
412
|
+
|
|
413
|
+
calculate_age_from_parts(year, month, day, @reference_date).to_s
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
def calculate_age_from_parts(year, month, day, reference)
|
|
417
|
+
# Validate date components
|
|
418
|
+
return nil unless year.is_a?(Integer) && year > 0 && year <= 9999
|
|
419
|
+
month = 1 if month.nil? || month < 1 || month > 12
|
|
420
|
+
day = 1 if day.nil? || day < 1 || day > 31
|
|
421
|
+
|
|
422
|
+
birth = Time.new(year, month, day)
|
|
423
|
+
age = reference.year - birth.year
|
|
424
|
+
|
|
425
|
+
# Adjust if birthday hasn't occurred yet this year
|
|
426
|
+
if reference.month < birth.month ||
|
|
427
|
+
(reference.month == birth.month && reference.day < birth.day)
|
|
428
|
+
age -= 1
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
age
|
|
432
|
+
rescue ArgumentError
|
|
433
|
+
# Invalid date (e.g., Feb 30)
|
|
434
|
+
nil
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
def calculate_age_between_dates(params)
|
|
438
|
+
pos = params[:positional]
|
|
439
|
+
return "" if pos.length < 6
|
|
440
|
+
|
|
441
|
+
birth_year = pos[0].to_i
|
|
442
|
+
birth_month = normalize_month(pos[1].to_i)
|
|
443
|
+
birth_day = normalize_day(pos[2].to_i)
|
|
444
|
+
end_year = pos[3].to_i
|
|
445
|
+
end_month = normalize_month(pos[4].to_i)
|
|
446
|
+
end_day = normalize_day(pos[5].to_i)
|
|
447
|
+
|
|
448
|
+
end_date = Time.new(end_year, end_month, end_day)
|
|
449
|
+
age = calculate_age_from_parts(birth_year, birth_month, birth_day, end_date)
|
|
450
|
+
age ? age.to_s : ""
|
|
451
|
+
rescue ArgumentError
|
|
452
|
+
""
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def calculate_days_between(params)
|
|
456
|
+
pos = params[:positional]
|
|
457
|
+
return "" if pos.length < 6
|
|
458
|
+
|
|
459
|
+
start_date = Time.new(pos[0].to_i, normalize_month(pos[1].to_i), normalize_day(pos[2].to_i))
|
|
460
|
+
end_date = Time.new(pos[3].to_i, normalize_month(pos[4].to_i), normalize_day(pos[5].to_i))
|
|
461
|
+
|
|
462
|
+
((end_date - start_date) / 86400).to_i.to_s
|
|
463
|
+
rescue ArgumentError
|
|
464
|
+
""
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
def normalize_month(month)
|
|
468
|
+
return 1 if month < 1 || month > 12
|
|
469
|
+
month
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
def normalize_day(day)
|
|
473
|
+
return 1 if day < 1 || day > 31
|
|
474
|
+
day
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
def calculate_age_years_and_days(params)
|
|
478
|
+
pos = params[:positional]
|
|
479
|
+
return "" if pos.length < 6
|
|
480
|
+
|
|
481
|
+
birth_year = pos[0].to_i
|
|
482
|
+
birth_month = normalize_month(pos[1].to_i)
|
|
483
|
+
birth_day = normalize_day(pos[2].to_i)
|
|
484
|
+
end_year = pos[3].to_i
|
|
485
|
+
end_month = normalize_month(pos[4].to_i)
|
|
486
|
+
end_day = normalize_day(pos[5].to_i)
|
|
487
|
+
|
|
488
|
+
birth_date = Time.new(birth_year, birth_month, birth_day)
|
|
489
|
+
end_date = Time.new(end_year, end_month, end_day)
|
|
490
|
+
|
|
491
|
+
years = calculate_age_from_parts(birth_year, birth_month, birth_day, end_date)
|
|
492
|
+
return "" unless years
|
|
493
|
+
|
|
494
|
+
# Calculate days since last birthday
|
|
495
|
+
last_birthday = Time.new(end_year, birth_month, birth_day)
|
|
496
|
+
last_birthday = Time.new(end_year - 1, birth_month, birth_day) if last_birthday > end_date
|
|
497
|
+
days = ((end_date - last_birthday) / 86400).to_i
|
|
498
|
+
|
|
499
|
+
"#{years} years, #{days} days"
|
|
500
|
+
rescue ArgumentError
|
|
501
|
+
""
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
def format_time_ago(params)
|
|
505
|
+
pos = params[:positional]
|
|
506
|
+
return "" if pos.empty?
|
|
507
|
+
|
|
508
|
+
year = pos[0].to_i
|
|
509
|
+
month = normalize_month(pos[1]&.to_i || 1)
|
|
510
|
+
day = normalize_day(pos[2]&.to_i || 1)
|
|
511
|
+
|
|
512
|
+
target = Time.new(year, month, day)
|
|
513
|
+
diff_days = ((@reference_date - target) / 86400).to_i
|
|
514
|
+
|
|
515
|
+
if diff_days < 30
|
|
516
|
+
"#{diff_days} days ago"
|
|
517
|
+
elsif diff_days < 365
|
|
518
|
+
months = (diff_days / 30.0).round
|
|
519
|
+
"#{months} months ago"
|
|
520
|
+
else
|
|
521
|
+
years = (diff_days / 365.0).round
|
|
522
|
+
"#{years} years ago"
|
|
523
|
+
end
|
|
524
|
+
rescue ArgumentError
|
|
525
|
+
""
|
|
526
|
+
end
|
|
527
|
+
|
|
528
|
+
# Convert template
|
|
529
|
+
|
|
530
|
+
def expand_convert(params)
|
|
531
|
+
pos = params[:positional]
|
|
532
|
+
return "" if pos.empty?
|
|
533
|
+
|
|
534
|
+
value = pos[0].to_f
|
|
535
|
+
from_unit = pos[1]&.strip || ""
|
|
536
|
+
to_unit = pos[2]&.strip || ""
|
|
537
|
+
|
|
538
|
+
return "#{format_number(value)} #{from_unit}" if to_unit.empty?
|
|
539
|
+
|
|
540
|
+
# Normalize units
|
|
541
|
+
from_normalized = normalize_unit(from_unit)
|
|
542
|
+
to_normalized = normalize_unit(to_unit)
|
|
543
|
+
|
|
544
|
+
conversion = CONVERSIONS[[from_normalized, to_normalized]]
|
|
545
|
+
|
|
546
|
+
if conversion.nil?
|
|
547
|
+
"#{format_number(value)} #{UNIT_DISPLAY[from_normalized] || from_unit}"
|
|
548
|
+
elsif conversion.is_a?(Symbol)
|
|
549
|
+
# Special conversion (temperature)
|
|
550
|
+
converted = send(conversion, value)
|
|
551
|
+
from_display = UNIT_DISPLAY[from_normalized] || from_unit
|
|
552
|
+
to_display = UNIT_DISPLAY[to_normalized] || to_unit
|
|
553
|
+
"#{format_number(value)} #{from_display} (#{format_number(converted)} #{to_display})"
|
|
554
|
+
else
|
|
555
|
+
converted = value * conversion
|
|
556
|
+
from_display = UNIT_DISPLAY[from_normalized] || from_unit
|
|
557
|
+
to_display = UNIT_DISPLAY[to_normalized] || to_unit
|
|
558
|
+
"#{format_number(value)} #{from_display} (#{format_number(converted)} #{to_display})"
|
|
559
|
+
end
|
|
560
|
+
end
|
|
561
|
+
|
|
562
|
+
def normalize_unit(unit)
|
|
563
|
+
# Remove common variations
|
|
564
|
+
unit.gsub(/\s+/, "")
|
|
565
|
+
end
|
|
566
|
+
|
|
567
|
+
def format_number(value)
|
|
568
|
+
# Format number, removing unnecessary decimals
|
|
569
|
+
rounded = value.round(1)
|
|
570
|
+
if rounded == rounded.to_i
|
|
571
|
+
rounded.to_i.to_s
|
|
572
|
+
else
|
|
573
|
+
format("%.1f", rounded)
|
|
574
|
+
end
|
|
575
|
+
end
|
|
576
|
+
|
|
577
|
+
def celsius_to_fahrenheit(c)
|
|
578
|
+
(c * 9.0 / 5.0 + 32).round
|
|
579
|
+
end
|
|
580
|
+
|
|
581
|
+
def fahrenheit_to_celsius(f)
|
|
582
|
+
((f - 32) * 5.0 / 9.0).round
|
|
583
|
+
end
|
|
584
|
+
|
|
585
|
+
# Common template expansions
|
|
586
|
+
|
|
587
|
+
def expand_circa(params)
|
|
588
|
+
pos = params[:positional]
|
|
589
|
+
return "" if pos.empty?
|
|
590
|
+
|
|
591
|
+
if pos.length >= 2
|
|
592
|
+
"c. #{pos[0]} – c. #{pos[1]}"
|
|
593
|
+
else
|
|
594
|
+
"c. #{pos[0]}"
|
|
595
|
+
end
|
|
596
|
+
end
|
|
597
|
+
|
|
598
|
+
def expand_floruit(params)
|
|
599
|
+
pos = params[:positional]
|
|
600
|
+
return "" if pos.empty?
|
|
601
|
+
|
|
602
|
+
if pos.length >= 2
|
|
603
|
+
"fl. #{pos[0]}–#{pos[1]}"
|
|
604
|
+
else
|
|
605
|
+
"fl. #{pos[0]}"
|
|
606
|
+
end
|
|
607
|
+
end
|
|
608
|
+
|
|
609
|
+
def expand_reign(params)
|
|
610
|
+
pos = params[:positional]
|
|
611
|
+
return "" if pos.length < 2
|
|
612
|
+
|
|
613
|
+
"r. #{pos[0]}–#{pos[1]}"
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
def expand_marriage(params)
|
|
617
|
+
pos = params[:positional]
|
|
618
|
+
return "" if pos.empty?
|
|
619
|
+
|
|
620
|
+
name = pos[0]
|
|
621
|
+
start_year = pos[1]
|
|
622
|
+
end_year = pos[2]
|
|
623
|
+
|
|
624
|
+
reason = params["reason"]&.downcase
|
|
625
|
+
|
|
626
|
+
if end_year && !end_year.empty?
|
|
627
|
+
end_abbr = case reason
|
|
628
|
+
when "widowed", "wid" then "wid."
|
|
629
|
+
when "died", "d" then "d."
|
|
630
|
+
else "div."
|
|
631
|
+
end
|
|
632
|
+
"#{name} (m. #{start_year}; #{end_abbr} #{end_year})"
|
|
633
|
+
elsif start_year
|
|
634
|
+
"#{name} (m. #{start_year})"
|
|
635
|
+
else
|
|
636
|
+
name.to_s
|
|
637
|
+
end
|
|
638
|
+
end
|
|
639
|
+
|
|
640
|
+
def expand_year_range(params)
|
|
641
|
+
pos = params[:positional]
|
|
642
|
+
return "" if pos.length < 2
|
|
643
|
+
|
|
644
|
+
"#{pos[0]}–#{pos[1]}"
|
|
645
|
+
end
|
|
646
|
+
|
|
647
|
+
# Coordinate template expansion
|
|
648
|
+
def expand_coord(params)
|
|
649
|
+
pos = params[:positional]
|
|
650
|
+
return "" if pos.empty?
|
|
651
|
+
|
|
652
|
+
# Handle different formats:
|
|
653
|
+
# {{coord|lat|lon}} - decimal
|
|
654
|
+
# {{coord|lat|N/S|lon|E/W}} - decimal with direction
|
|
655
|
+
# {{coord|d|m|s|N/S|d|m|s|E/W}} - DMS
|
|
656
|
+
|
|
657
|
+
case pos.length
|
|
658
|
+
when 2
|
|
659
|
+
# Simple lat/lon decimal
|
|
660
|
+
"#{pos[0]}°, #{pos[1]}°"
|
|
661
|
+
when 4
|
|
662
|
+
# lat|N/S|lon|E/W
|
|
663
|
+
if pos[1] =~ /[NS]/i && pos[3] =~ /[EW]/i
|
|
664
|
+
"#{pos[0]}° #{pos[1].upcase}, #{pos[2]}° #{pos[3].upcase}"
|
|
665
|
+
else
|
|
666
|
+
# d|m|N/S|d|m|E/W (no seconds)
|
|
667
|
+
"#{pos[0]}°#{pos[1]}′ #{pos[2].upcase}, #{pos[3]}°#{pos[4]}′ #{pos[5].upcase}" rescue pos.join(", ")
|
|
668
|
+
end
|
|
669
|
+
when 6
|
|
670
|
+
# d|m|N/S|d|m|E/W
|
|
671
|
+
"#{pos[0]}°#{pos[1]}′ #{pos[2].upcase}, #{pos[3]}°#{pos[4]}′ #{pos[5].upcase}"
|
|
672
|
+
when 8
|
|
673
|
+
# d|m|s|N/S|d|m|s|E/W (full DMS)
|
|
674
|
+
"#{pos[0]}°#{pos[1]}′#{pos[2]}″ #{pos[3].upcase}, #{pos[4]}°#{pos[5]}′#{pos[6]}″ #{pos[7].upcase}"
|
|
675
|
+
else
|
|
676
|
+
# Fallback: join with commas
|
|
677
|
+
pos.reject { |p| p =~ /display|format|type|name/i }.join(", ")
|
|
678
|
+
end
|
|
679
|
+
end
|
|
680
|
+
|
|
681
|
+
# Language code to name mapping
|
|
682
|
+
LANGUAGE_NAMES = {
|
|
683
|
+
"aa" => "Afar", "ab" => "Abkhazian", "af" => "Afrikaans", "am" => "Amharic",
|
|
684
|
+
"ar" => "Arabic", "as" => "Assamese", "az" => "Azerbaijani", "ba" => "Bashkir",
|
|
685
|
+
"be" => "Belarusian", "bg" => "Bulgarian", "bn" => "Bengali", "bo" => "Tibetan",
|
|
686
|
+
"br" => "Breton", "ca" => "Catalan", "cs" => "Czech", "cy" => "Welsh",
|
|
687
|
+
"da" => "Danish", "de" => "German", "dv" => "Divehi", "dz" => "Dzongkha",
|
|
688
|
+
"el" => "Greek", "en" => "English", "eo" => "Esperanto", "es" => "Spanish",
|
|
689
|
+
"et" => "Estonian", "eu" => "Basque", "fa" => "Persian", "fi" => "Finnish",
|
|
690
|
+
"fj" => "Fijian", "fo" => "Faroese", "fr" => "French", "fy" => "Frisian",
|
|
691
|
+
"ga" => "Irish", "gd" => "Scottish Gaelic", "gl" => "Galician", "gn" => "Guarani",
|
|
692
|
+
"gu" => "Gujarati", "ha" => "Hausa", "he" => "Hebrew", "hi" => "Hindi",
|
|
693
|
+
"hr" => "Croatian", "hu" => "Hungarian", "hy" => "Armenian", "id" => "Indonesian",
|
|
694
|
+
"is" => "Icelandic", "it" => "Italian", "ja" => "Japanese", "jv" => "Javanese",
|
|
695
|
+
"ka" => "Georgian", "kk" => "Kazakh", "km" => "Khmer", "kn" => "Kannada",
|
|
696
|
+
"ko" => "Korean", "ku" => "Kurdish", "ky" => "Kyrgyz", "la" => "Latin",
|
|
697
|
+
"lb" => "Luxembourgish", "lo" => "Lao", "lt" => "Lithuanian", "lv" => "Latvian",
|
|
698
|
+
"mg" => "Malagasy", "mi" => "Maori", "mk" => "Macedonian", "ml" => "Malayalam",
|
|
699
|
+
"mn" => "Mongolian", "mr" => "Marathi", "ms" => "Malay", "mt" => "Maltese",
|
|
700
|
+
"my" => "Burmese", "ne" => "Nepali", "nl" => "Dutch", "no" => "Norwegian",
|
|
701
|
+
"oc" => "Occitan", "or" => "Oriya", "pa" => "Punjabi", "pl" => "Polish",
|
|
702
|
+
"ps" => "Pashto", "pt" => "Portuguese", "qu" => "Quechua", "rm" => "Romansh",
|
|
703
|
+
"ro" => "Romanian", "ru" => "Russian", "rw" => "Kinyarwanda", "sa" => "Sanskrit",
|
|
704
|
+
"sc" => "Sardinian", "sd" => "Sindhi", "se" => "Northern Sami", "si" => "Sinhala",
|
|
705
|
+
"sk" => "Slovak", "sl" => "Slovenian", "sm" => "Samoan", "sn" => "Shona",
|
|
706
|
+
"so" => "Somali", "sq" => "Albanian", "sr" => "Serbian", "ss" => "Swati",
|
|
707
|
+
"st" => "Southern Sotho", "su" => "Sundanese", "sv" => "Swedish", "sw" => "Swahili",
|
|
708
|
+
"ta" => "Tamil", "te" => "Telugu", "tg" => "Tajik", "th" => "Thai",
|
|
709
|
+
"ti" => "Tigrinya", "tk" => "Turkmen", "tl" => "Tagalog", "tn" => "Tswana",
|
|
710
|
+
"to" => "Tongan", "tr" => "Turkish", "ts" => "Tsonga", "tt" => "Tatar",
|
|
711
|
+
"tw" => "Twi", "ug" => "Uyghur", "uk" => "Ukrainian", "ur" => "Urdu",
|
|
712
|
+
"uz" => "Uzbek", "ve" => "Venda", "vi" => "Vietnamese", "vo" => "Volapük",
|
|
713
|
+
"wa" => "Walloon", "wo" => "Wolof", "xh" => "Xhosa", "yi" => "Yiddish",
|
|
714
|
+
"yo" => "Yoruba", "za" => "Zhuang", "zh" => "Chinese", "zu" => "Zulu",
|
|
715
|
+
# Extended codes
|
|
716
|
+
"grc" => "Ancient Greek", "ang" => "Old English", "fro" => "Old French",
|
|
717
|
+
"gmh" => "Middle High German", "non" => "Old Norse", "peo" => "Old Persian",
|
|
718
|
+
"sga" => "Old Irish", "syc" => "Classical Syriac"
|
|
719
|
+
}.freeze
|
|
720
|
+
|
|
721
|
+
def expand_lang(params)
|
|
722
|
+
pos = params[:positional]
|
|
723
|
+
return "" if pos.length < 2
|
|
724
|
+
|
|
725
|
+
text = pos[1] || ""
|
|
726
|
+
lit = params["lit"]
|
|
727
|
+
|
|
728
|
+
if lit
|
|
729
|
+
"#{text} (lit. '#{lit}')"
|
|
730
|
+
else
|
|
731
|
+
text
|
|
732
|
+
end
|
|
733
|
+
end
|
|
734
|
+
|
|
735
|
+
def expand_lang_xx(template_name, params)
|
|
736
|
+
pos = params[:positional]
|
|
737
|
+
return "" if pos.empty?
|
|
738
|
+
|
|
739
|
+
lang_code = template_name.sub("lang-", "")
|
|
740
|
+
lang_name = LANGUAGE_NAMES[lang_code] || lang_code.upcase
|
|
741
|
+
text = pos[0] || ""
|
|
742
|
+
lit = params["lit"]
|
|
743
|
+
|
|
744
|
+
if lit
|
|
745
|
+
"#{lang_name}: #{text} (lit. '#{lit}')"
|
|
746
|
+
else
|
|
747
|
+
"#{lang_name}: #{text}"
|
|
748
|
+
end
|
|
749
|
+
end
|
|
750
|
+
|
|
751
|
+
def expand_transl(params)
|
|
752
|
+
pos = params[:positional]
|
|
753
|
+
return "" if pos.empty?
|
|
754
|
+
|
|
755
|
+
# {{transl|lang|text}} - just return the text
|
|
756
|
+
pos[1] || pos[0] || ""
|
|
757
|
+
end
|
|
758
|
+
|
|
759
|
+
def expand_nihongo(params)
|
|
760
|
+
pos = params[:positional]
|
|
761
|
+
return "" if pos.empty?
|
|
762
|
+
|
|
763
|
+
english = pos[0] || ""
|
|
764
|
+
kanji = pos[1] || ""
|
|
765
|
+
romaji = pos[2]
|
|
766
|
+
|
|
767
|
+
parts = [english]
|
|
768
|
+
parts << "(#{kanji}" if kanji && !kanji.empty?
|
|
769
|
+
|
|
770
|
+
if romaji && !romaji.empty?
|
|
771
|
+
parts[-1] += ", #{romaji})" if parts.length > 1
|
|
772
|
+
elsif parts.length > 1
|
|
773
|
+
parts[-1] += ")"
|
|
774
|
+
end
|
|
775
|
+
|
|
776
|
+
parts.join(" ")
|
|
777
|
+
end
|
|
778
|
+
|
|
779
|
+
# Blockquote template - extracts quoted text
|
|
780
|
+
def expand_blockquote(params)
|
|
781
|
+
pos = params[:positional]
|
|
782
|
+
text = params["text"] || params["1"] || pos[0] || ""
|
|
783
|
+
source = params["source"] || params["2"] || pos[1]
|
|
784
|
+
|
|
785
|
+
result = text.strip
|
|
786
|
+
result += " — #{source}" if source && !source.empty?
|
|
787
|
+
result
|
|
788
|
+
end
|
|
789
|
+
|
|
790
|
+
# Fraction template - formats fractions
|
|
791
|
+
def expand_fraction(params)
|
|
792
|
+
pos = params[:positional]
|
|
793
|
+
return "" if pos.empty?
|
|
794
|
+
|
|
795
|
+
case pos.length
|
|
796
|
+
when 1
|
|
797
|
+
# Just denominator: {{frac|2}} -> 1/2
|
|
798
|
+
"1/#{pos[0]}"
|
|
799
|
+
when 2
|
|
800
|
+
# Numerator and denominator: {{frac|1|2}} -> 1/2
|
|
801
|
+
"#{pos[0]}/#{pos[1]}"
|
|
802
|
+
when 3
|
|
803
|
+
# Whole, numerator, denominator: {{frac|1|1|4}} -> 1+1/4
|
|
804
|
+
"#{pos[0]}+#{pos[1]}/#{pos[2]}"
|
|
805
|
+
else
|
|
806
|
+
pos.join("/")
|
|
807
|
+
end
|
|
808
|
+
end
|
|
809
|
+
|
|
810
|
+
# As of template - formats date reference
|
|
811
|
+
def expand_as_of(params)
|
|
812
|
+
pos = params[:positional]
|
|
813
|
+
return "" if pos.empty?
|
|
814
|
+
|
|
815
|
+
year = pos[0]
|
|
816
|
+
month = pos[1]
|
|
817
|
+
day = pos[2]
|
|
818
|
+
|
|
819
|
+
if day && month && year
|
|
820
|
+
"As of #{MONTH_NAMES[month.to_i - 1]} #{day}, #{year}"
|
|
821
|
+
elsif month && year
|
|
822
|
+
"As of #{MONTH_NAMES[month.to_i - 1]} #{year}"
|
|
823
|
+
elsif year
|
|
824
|
+
"As of #{year}"
|
|
825
|
+
else
|
|
826
|
+
""
|
|
827
|
+
end
|
|
828
|
+
end
|
|
829
|
+
end
|
|
830
|
+
end
|