wp2txt 1.1.3 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. checksums.yaml +4 -4
  2. data/.dockerignore +12 -0
  3. data/.github/workflows/ci.yml +13 -13
  4. data/.gitignore +14 -0
  5. data/CHANGELOG.md +284 -0
  6. data/DEVELOPMENT.md +415 -0
  7. data/DEVELOPMENT_ja.md +415 -0
  8. data/Dockerfile +19 -10
  9. data/Gemfile +2 -8
  10. data/README.md +259 -123
  11. data/README_ja.md +375 -0
  12. data/Rakefile +4 -0
  13. data/bin/wp2txt +863 -161
  14. data/lib/wp2txt/article.rb +98 -13
  15. data/lib/wp2txt/bz2_validator.rb +239 -0
  16. data/lib/wp2txt/category_cache.rb +313 -0
  17. data/lib/wp2txt/cli.rb +319 -0
  18. data/lib/wp2txt/cli_ui.rb +428 -0
  19. data/lib/wp2txt/config.rb +158 -0
  20. data/lib/wp2txt/constants.rb +134 -0
  21. data/lib/wp2txt/data/html_entities.json +2135 -0
  22. data/lib/wp2txt/data/language_metadata.json +4769 -0
  23. data/lib/wp2txt/data/language_tiers.json +59 -0
  24. data/lib/wp2txt/data/mediawiki_aliases.json +12366 -0
  25. data/lib/wp2txt/data/template_aliases.json +193 -0
  26. data/lib/wp2txt/data/wikipedia_entities.json +12 -0
  27. data/lib/wp2txt/extractor.rb +545 -0
  28. data/lib/wp2txt/file_utils.rb +91 -0
  29. data/lib/wp2txt/formatter.rb +352 -0
  30. data/lib/wp2txt/global_data_cache.rb +353 -0
  31. data/lib/wp2txt/index_cache.rb +258 -0
  32. data/lib/wp2txt/magic_words.rb +353 -0
  33. data/lib/wp2txt/memory_monitor.rb +236 -0
  34. data/lib/wp2txt/multistream.rb +1383 -0
  35. data/lib/wp2txt/output_writer.rb +182 -0
  36. data/lib/wp2txt/parser_functions.rb +606 -0
  37. data/lib/wp2txt/ractor_worker.rb +215 -0
  38. data/lib/wp2txt/regex.rb +396 -12
  39. data/lib/wp2txt/section_extractor.rb +354 -0
  40. data/lib/wp2txt/stream_processor.rb +271 -0
  41. data/lib/wp2txt/template_expander.rb +830 -0
  42. data/lib/wp2txt/text_processing.rb +337 -0
  43. data/lib/wp2txt/utils.rb +629 -270
  44. data/lib/wp2txt/version.rb +1 -1
  45. data/lib/wp2txt.rb +53 -26
  46. data/scripts/benchmark_regex.rb +161 -0
  47. data/scripts/fetch_html_entities.rb +94 -0
  48. data/scripts/fetch_language_metadata.rb +180 -0
  49. data/scripts/fetch_mediawiki_data.rb +334 -0
  50. data/scripts/fetch_template_data.rb +186 -0
  51. data/scripts/profile_memory.rb +139 -0
  52. data/spec/article_spec.rb +402 -0
  53. data/spec/auto_download_spec.rb +314 -0
  54. data/spec/bz2_validator_spec.rb +193 -0
  55. data/spec/category_cache_spec.rb +226 -0
  56. data/spec/category_fetcher_spec.rb +504 -0
  57. data/spec/cleanup_spec.rb +197 -0
  58. data/spec/cli_options_spec.rb +678 -0
  59. data/spec/cli_spec.rb +876 -0
  60. data/spec/config_spec.rb +194 -0
  61. data/spec/constants_spec.rb +138 -0
  62. data/spec/file_utils_spec.rb +170 -0
  63. data/spec/fixtures/samples.rb +181 -0
  64. data/spec/formatter_sections_spec.rb +382 -0
  65. data/spec/global_data_cache_spec.rb +186 -0
  66. data/spec/index_cache_spec.rb +210 -0
  67. data/spec/integration_spec.rb +543 -0
  68. data/spec/magic_words_spec.rb +261 -0
  69. data/spec/markers_spec.rb +476 -0
  70. data/spec/memory_monitor_spec.rb +192 -0
  71. data/spec/multistream_spec.rb +690 -0
  72. data/spec/output_writer_spec.rb +400 -0
  73. data/spec/parser_functions_spec.rb +455 -0
  74. data/spec/ractor_worker_spec.rb +197 -0
  75. data/spec/regex_spec.rb +281 -0
  76. data/spec/section_extractor_spec.rb +397 -0
  77. data/spec/spec_helper.rb +63 -0
  78. data/spec/stream_processor_spec.rb +579 -0
  79. data/spec/template_data_spec.rb +246 -0
  80. data/spec/template_expander_spec.rb +472 -0
  81. data/spec/template_processing_spec.rb +217 -0
  82. data/spec/text_processing_spec.rb +312 -0
  83. data/spec/utils_spec.rb +195 -16
  84. data/spec/wp2txt_spec.rb +510 -0
  85. data/wp2txt.gemspec +5 -3
  86. metadata +146 -18
  87. data/.rubocop.yml +0 -80
  88. data/data/output_samples/testdata_en.txt +0 -23002
  89. data/data/output_samples/testdata_en_category.txt +0 -132
  90. data/data/output_samples/testdata_en_summary.txt +0 -1376
  91. data/data/output_samples/testdata_ja.txt +0 -22774
  92. data/data/output_samples/testdata_ja_category.txt +0 -206
  93. data/data/output_samples/testdata_ja_summary.txt +0 -1560
  94. data/data/testdata_en.bz2 +0 -0
  95. data/data/testdata_ja.bz2 +0 -0
  96. data/image/screenshot.png +0 -0
@@ -0,0 +1,830 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Wp2txt
4
+ # Expands common MediaWiki templates to their text representation
5
+ # Handles date templates, convert templates, and other common patterns
6
+ class TemplateExpander
7
+ MONTH_NAMES = %w[
8
+ January February March April May June
9
+ July August September October November December
10
+ ].freeze
11
+
12
+ # Unit conversion factors
13
+ CONVERSIONS = {
14
+ # Length
15
+ ["km", "mi"] => 0.621371,
16
+ ["mi", "km"] => 1.60934,
17
+ ["m", "ft"] => 3.28084,
18
+ ["ft", "m"] => 0.3048,
19
+ ["cm", "in"] => 0.393701,
20
+ ["in", "cm"] => 2.54,
21
+ ["mm", "in"] => 0.0393701,
22
+ ["in", "mm"] => 25.4,
23
+ ["yd", "m"] => 0.9144,
24
+ ["m", "yd"] => 1.09361,
25
+ # Weight
26
+ ["kg", "lb"] => 2.20462,
27
+ ["lb", "kg"] => 0.453592,
28
+ ["g", "oz"] => 0.035274,
29
+ ["oz", "g"] => 28.3495,
30
+ ["t", "lb"] => 2204.62,
31
+ ["lb", "t"] => 0.000453592,
32
+ # Temperature (special handling)
33
+ ["C", "F"] => :celsius_to_fahrenheit,
34
+ ["°C", "°F"] => :celsius_to_fahrenheit,
35
+ ["F", "C"] => :fahrenheit_to_celsius,
36
+ ["°F", "°C"] => :fahrenheit_to_celsius,
37
+ # Area
38
+ ["km2", "sqmi"] => 0.386102,
39
+ ["sqmi", "km2"] => 2.58999,
40
+ ["ha", "acre"] => 2.47105,
41
+ ["acre", "ha"] => 0.404686,
42
+ ["m2", "sqft"] => 10.7639,
43
+ ["sqft", "m2"] => 0.092903,
44
+ # Speed
45
+ ["km/h", "mph"] => 0.621371,
46
+ ["mph", "km/h"] => 1.60934,
47
+ ["m/s", "km/h"] => 3.6,
48
+ ["km/h", "m/s"] => 0.277778,
49
+ # Volume
50
+ ["l", "gal"] => 0.264172,
51
+ ["gal", "l"] => 3.78541,
52
+ ["ml", "floz"] => 0.033814,
53
+ ["floz", "ml"] => 29.5735
54
+ }.freeze
55
+
56
+ # Unit display names
57
+ UNIT_DISPLAY = {
58
+ "km" => "km",
59
+ "mi" => "mi",
60
+ "m" => "m",
61
+ "ft" => "ft",
62
+ "cm" => "cm",
63
+ "in" => "in",
64
+ "mm" => "mm",
65
+ "yd" => "yd",
66
+ "kg" => "kg",
67
+ "lb" => "lb",
68
+ "g" => "g",
69
+ "oz" => "oz",
70
+ "t" => "t",
71
+ "C" => "°C",
72
+ "°C" => "°C",
73
+ "F" => "°F",
74
+ "°F" => "°F",
75
+ "km2" => "km²",
76
+ "sqmi" => "sq mi",
77
+ "ha" => "ha",
78
+ "acre" => "acres",
79
+ "m2" => "m²",
80
+ "sqft" => "sq ft",
81
+ "km/h" => "km/h",
82
+ "mph" => "mph",
83
+ "m/s" => "m/s",
84
+ "l" => "L",
85
+ "gal" => "gal",
86
+ "ml" => "mL",
87
+ "floz" => "fl oz"
88
+ }.freeze
89
+
90
+ def initialize(reference_date: nil, preserve_unknown: false)
91
+ @reference_date = reference_date || Time.now
92
+ @preserve_unknown = preserve_unknown
93
+ end
94
+
95
+ # Main expansion method
96
+ def expand(text)
97
+ return text if text.nil? || text.empty?
98
+
99
+ # Early exit: no templates to expand
100
+ return text unless text.include?("{{")
101
+
102
+ result = text.dup
103
+
104
+ # Process templates from innermost to outermost
105
+ max_iterations = 10
106
+ iteration = 0
107
+
108
+ while result.include?("{{") && iteration < max_iterations
109
+ previous = result.dup
110
+ result = expand_templates_single_pass(result)
111
+ break if result == previous
112
+ iteration += 1
113
+ end
114
+
115
+ result
116
+ end
117
+
118
+ private
119
+
120
+ def expand_templates_single_pass(text)
121
+ result = +""
122
+ pos = 0
123
+
124
+ while pos < text.length
125
+ start_idx = text.index("{{", pos)
126
+
127
+ if start_idx.nil?
128
+ result << text[pos..]
129
+ break
130
+ end
131
+
132
+ # Add text before template
133
+ result << text[pos...start_idx]
134
+
135
+ # Find matching }}
136
+ end_idx = find_template_end(text, start_idx + 2)
137
+
138
+ if end_idx.nil?
139
+ # No matching }}, treat as plain text
140
+ result << text[start_idx..]
141
+ break
142
+ end
143
+
144
+ template_content = text[(start_idx + 2)...end_idx]
145
+ expanded = expand_single_template(template_content)
146
+ result << expanded
147
+
148
+ pos = end_idx + 2
149
+ end
150
+
151
+ result
152
+ end
153
+
154
+ def find_template_end(text, start_pos)
155
+ depth = 1
156
+ pos = start_pos
157
+
158
+ while pos < text.length - 1
159
+ if text[pos, 2] == "{{"
160
+ depth += 1
161
+ pos += 2
162
+ elsif text[pos, 2] == "}}"
163
+ depth -= 1
164
+ return pos if depth == 0
165
+ pos += 2
166
+ else
167
+ pos += 1
168
+ end
169
+ end
170
+
171
+ nil
172
+ end
173
+
174
+ def expand_single_template(content)
175
+ parts = split_template_parts(content)
176
+ return "" if parts.empty?
177
+
178
+ template_name = parts[0].strip.downcase
179
+ params = parse_template_params(parts[1..])
180
+
181
+ case template_name
182
+ # Date templates
183
+ when "birth date", "birthdate"
184
+ format_date(params, style: :mdy)
185
+ when "birth date and age", "birthdate and age"
186
+ format_date_with_age(params, style: :mdy, age_label: "age")
187
+ when "death date", "deathdate"
188
+ format_date(params, style: :mdy)
189
+ when "death date and age", "deathdate and age"
190
+ format_death_date_with_age(params)
191
+ when "start date", "startdate"
192
+ format_date(params, style: :mdy)
193
+ when "end date", "enddate"
194
+ format_date(params, style: :mdy)
195
+ when "date"
196
+ format_simple_date(params)
197
+
198
+ # Age templates
199
+ when "age"
200
+ calculate_age(params)
201
+ when "age in years"
202
+ calculate_age_between_dates(params)
203
+ when "age in days"
204
+ calculate_days_between(params)
205
+ when "age in years and days"
206
+ calculate_age_years_and_days(params)
207
+ when "time ago"
208
+ format_time_ago(params)
209
+
210
+ # Convert template
211
+ when "convert", "cvt"
212
+ expand_convert(params)
213
+
214
+ # Common templates
215
+ when "circa", "c."
216
+ expand_circa(params)
217
+ when "floruit", "fl."
218
+ expand_floruit(params)
219
+ when "reign", "r."
220
+ expand_reign(params)
221
+ when "marriage", "married"
222
+ expand_marriage(params)
223
+ when "played years"
224
+ expand_year_range(params)
225
+
226
+ # Coordinate template
227
+ when "coord", "coordinate", "coordinates"
228
+ expand_coord(params)
229
+
230
+ # Language templates
231
+ when "lang"
232
+ expand_lang(params)
233
+ when "transl"
234
+ expand_transl(params)
235
+ when "nihongo"
236
+ expand_nihongo(params)
237
+
238
+ # Formatting templates (pass through text)
239
+ when "nowrap", "nobr"
240
+ params[:positional][0] || ""
241
+ when "small", "smaller"
242
+ params[:positional][0] || ""
243
+ when "em", "bold", "strong"
244
+ params[:positional][0] || ""
245
+ when "abbr", "abbrlink"
246
+ params[:positional][0] || ""
247
+ when "blockquote", "quote", "cquote", "quotation"
248
+ expand_blockquote(params)
249
+ when "frac", "fraction", "sfrac"
250
+ expand_fraction(params)
251
+ when "sub", "sup"
252
+ params[:positional][0] || ""
253
+ when "wikt", "wiktionary"
254
+ params[:positional][1] || params[:positional][0] || ""
255
+ when "sic"
256
+ "[sic]"
257
+ when "as of"
258
+ expand_as_of(params)
259
+ when "age", "birth year and age", "death year and age"
260
+ calculate_age(params)
261
+
262
+ else
263
+ # Handle lang-xx templates (e.g., lang-fr, lang-de, lang-ja)
264
+ if template_name.start_with?("lang-")
265
+ expand_lang_xx(template_name, params)
266
+ else
267
+ @preserve_unknown ? "{{#{content}}}" : ""
268
+ end
269
+ end
270
+ end
271
+
272
+ def split_template_parts(content)
273
+ parts = []
274
+ current = +""
275
+ depth = 0
276
+
277
+ content.each_char do |c|
278
+ if c == "{" || c == "["
279
+ depth += 1
280
+ current << c
281
+ elsif c == "}" || c == "]"
282
+ depth -= 1
283
+ current << c
284
+ elsif c == "|" && depth == 0
285
+ parts << current
286
+ current = +""
287
+ else
288
+ current << c
289
+ end
290
+ end
291
+
292
+ parts << current unless current.empty?
293
+ parts
294
+ end
295
+
296
+ def parse_template_params(parts)
297
+ params = { positional: [] }
298
+
299
+ parts.each do |part|
300
+ # Check for named parameter (key=value)
301
+ # Only treat as named parameter if:
302
+ # 1. Contains '='
303
+ # 2. The key part looks like a valid parameter name (alphanumeric/underscore only)
304
+ # 3. Key doesn't contain HTML tags or other special chars
305
+ if part.include?("=")
306
+ key, value = part.split("=", 2)
307
+ key_stripped = key.strip
308
+ # Valid param name: only letters, digits, underscore, space
309
+ # Should NOT contain < > { } or other markup
310
+ if key_stripped.match?(/\A[\w\s]+\z/) && !key_stripped.match?(/[<>{}\[\]]/)
311
+ params[key_stripped.downcase] = value&.strip
312
+ else
313
+ # Treat as positional if key doesn't look valid
314
+ params[:positional] << part.strip
315
+ end
316
+ else
317
+ params[:positional] << part.strip
318
+ end
319
+ end
320
+
321
+ params
322
+ end
323
+
324
+ # Date formatting methods
325
+
326
+ def format_date(params, style: :mdy)
327
+ pos = params[:positional]
328
+ return "" if pos.empty?
329
+
330
+ year = pos[0].to_i
331
+ month = pos[1]&.to_i
332
+ day = pos[2]&.to_i
333
+
334
+ # Check for df=yes (day first)
335
+ use_dmy = params["df"] == "yes" || params["df"] == "y"
336
+
337
+ format_date_parts(year, month, day, use_dmy ? :dmy : style)
338
+ end
339
+
340
+ def format_date_parts(year, month, day, style)
341
+ return year.to_s unless month && month > 0
342
+
343
+ month_name = MONTH_NAMES[month - 1]
344
+ return "#{month_name} #{year}" unless day && day > 0
345
+
346
+ case style
347
+ when :dmy
348
+ "#{day} #{month_name} #{year}"
349
+ else # :mdy
350
+ "#{month_name} #{day}, #{year}"
351
+ end
352
+ end
353
+
354
+ def format_simple_date(params)
355
+ pos = params[:positional]
356
+ return "" if pos.empty?
357
+
358
+ year = pos[0].to_i
359
+ month = pos[1]&.to_i
360
+ day = pos[2]&.to_i
361
+
362
+ format_date_parts(year, month, day, :mdy)
363
+ end
364
+
365
+ def format_date_with_age(params, style: :mdy, age_label: "age")
366
+ pos = params[:positional]
367
+ return "" if pos.empty?
368
+
369
+ year = pos[0].to_i
370
+ month = pos[1]&.to_i || 1
371
+ day = pos[2]&.to_i || 1
372
+
373
+ use_dmy = params["df"] == "yes" || params["df"] == "y"
374
+
375
+ date_str = format_date_parts(year, month, day, use_dmy ? :dmy : style)
376
+ age = calculate_age_from_parts(year, month, day, @reference_date)
377
+
378
+ "#{date_str} (#{age_label} #{age})"
379
+ end
380
+
381
+ def format_death_date_with_age(params)
382
+ pos = params[:positional]
383
+ return "" if pos.length < 6
384
+
385
+ death_year = pos[0].to_i
386
+ death_month = normalize_month(pos[1].to_i)
387
+ death_day = normalize_day(pos[2].to_i)
388
+ birth_year = pos[3].to_i
389
+ birth_month = normalize_month(pos[4].to_i)
390
+ birth_day = normalize_day(pos[5].to_i)
391
+
392
+ use_dmy = params["df"] == "yes" || params["df"] == "y"
393
+
394
+ date_str = format_date_parts(death_year, death_month, death_day, use_dmy ? :dmy : :mdy)
395
+ death_date = Time.new(death_year, death_month, death_day)
396
+ age = calculate_age_from_parts(birth_year, birth_month, birth_day, death_date)
397
+
398
+ age ? "#{date_str} (aged #{age})" : date_str
399
+ rescue ArgumentError
400
+ format_date_parts(death_year, death_month, death_day, use_dmy ? :dmy : :mdy)
401
+ end
402
+
403
+ # Age calculation methods
404
+
405
+ def calculate_age(params)
406
+ pos = params[:positional]
407
+ return "" if pos.empty?
408
+
409
+ year = pos[0].to_i
410
+ month = pos[1]&.to_i || 1
411
+ day = pos[2]&.to_i || 1
412
+
413
+ calculate_age_from_parts(year, month, day, @reference_date).to_s
414
+ end
415
+
416
+ def calculate_age_from_parts(year, month, day, reference)
417
+ # Validate date components
418
+ return nil unless year.is_a?(Integer) && year > 0 && year <= 9999
419
+ month = 1 if month.nil? || month < 1 || month > 12
420
+ day = 1 if day.nil? || day < 1 || day > 31
421
+
422
+ birth = Time.new(year, month, day)
423
+ age = reference.year - birth.year
424
+
425
+ # Adjust if birthday hasn't occurred yet this year
426
+ if reference.month < birth.month ||
427
+ (reference.month == birth.month && reference.day < birth.day)
428
+ age -= 1
429
+ end
430
+
431
+ age
432
+ rescue ArgumentError
433
+ # Invalid date (e.g., Feb 30)
434
+ nil
435
+ end
436
+
437
+ def calculate_age_between_dates(params)
438
+ pos = params[:positional]
439
+ return "" if pos.length < 6
440
+
441
+ birth_year = pos[0].to_i
442
+ birth_month = normalize_month(pos[1].to_i)
443
+ birth_day = normalize_day(pos[2].to_i)
444
+ end_year = pos[3].to_i
445
+ end_month = normalize_month(pos[4].to_i)
446
+ end_day = normalize_day(pos[5].to_i)
447
+
448
+ end_date = Time.new(end_year, end_month, end_day)
449
+ age = calculate_age_from_parts(birth_year, birth_month, birth_day, end_date)
450
+ age ? age.to_s : ""
451
+ rescue ArgumentError
452
+ ""
453
+ end
454
+
455
+ def calculate_days_between(params)
456
+ pos = params[:positional]
457
+ return "" if pos.length < 6
458
+
459
+ start_date = Time.new(pos[0].to_i, normalize_month(pos[1].to_i), normalize_day(pos[2].to_i))
460
+ end_date = Time.new(pos[3].to_i, normalize_month(pos[4].to_i), normalize_day(pos[5].to_i))
461
+
462
+ ((end_date - start_date) / 86400).to_i.to_s
463
+ rescue ArgumentError
464
+ ""
465
+ end
466
+
467
+ def normalize_month(month)
468
+ return 1 if month < 1 || month > 12
469
+ month
470
+ end
471
+
472
+ def normalize_day(day)
473
+ return 1 if day < 1 || day > 31
474
+ day
475
+ end
476
+
477
+ def calculate_age_years_and_days(params)
478
+ pos = params[:positional]
479
+ return "" if pos.length < 6
480
+
481
+ birth_year = pos[0].to_i
482
+ birth_month = normalize_month(pos[1].to_i)
483
+ birth_day = normalize_day(pos[2].to_i)
484
+ end_year = pos[3].to_i
485
+ end_month = normalize_month(pos[4].to_i)
486
+ end_day = normalize_day(pos[5].to_i)
487
+
488
+ birth_date = Time.new(birth_year, birth_month, birth_day)
489
+ end_date = Time.new(end_year, end_month, end_day)
490
+
491
+ years = calculate_age_from_parts(birth_year, birth_month, birth_day, end_date)
492
+ return "" unless years
493
+
494
+ # Calculate days since last birthday
495
+ last_birthday = Time.new(end_year, birth_month, birth_day)
496
+ last_birthday = Time.new(end_year - 1, birth_month, birth_day) if last_birthday > end_date
497
+ days = ((end_date - last_birthday) / 86400).to_i
498
+
499
+ "#{years} years, #{days} days"
500
+ rescue ArgumentError
501
+ ""
502
+ end
503
+
504
+ def format_time_ago(params)
505
+ pos = params[:positional]
506
+ return "" if pos.empty?
507
+
508
+ year = pos[0].to_i
509
+ month = normalize_month(pos[1]&.to_i || 1)
510
+ day = normalize_day(pos[2]&.to_i || 1)
511
+
512
+ target = Time.new(year, month, day)
513
+ diff_days = ((@reference_date - target) / 86400).to_i
514
+
515
+ if diff_days < 30
516
+ "#{diff_days} days ago"
517
+ elsif diff_days < 365
518
+ months = (diff_days / 30.0).round
519
+ "#{months} months ago"
520
+ else
521
+ years = (diff_days / 365.0).round
522
+ "#{years} years ago"
523
+ end
524
+ rescue ArgumentError
525
+ ""
526
+ end
527
+
528
+ # Convert template
529
+
530
+ def expand_convert(params)
531
+ pos = params[:positional]
532
+ return "" if pos.empty?
533
+
534
+ value = pos[0].to_f
535
+ from_unit = pos[1]&.strip || ""
536
+ to_unit = pos[2]&.strip || ""
537
+
538
+ return "#{format_number(value)} #{from_unit}" if to_unit.empty?
539
+
540
+ # Normalize units
541
+ from_normalized = normalize_unit(from_unit)
542
+ to_normalized = normalize_unit(to_unit)
543
+
544
+ conversion = CONVERSIONS[[from_normalized, to_normalized]]
545
+
546
+ if conversion.nil?
547
+ "#{format_number(value)} #{UNIT_DISPLAY[from_normalized] || from_unit}"
548
+ elsif conversion.is_a?(Symbol)
549
+ # Special conversion (temperature)
550
+ converted = send(conversion, value)
551
+ from_display = UNIT_DISPLAY[from_normalized] || from_unit
552
+ to_display = UNIT_DISPLAY[to_normalized] || to_unit
553
+ "#{format_number(value)} #{from_display} (#{format_number(converted)} #{to_display})"
554
+ else
555
+ converted = value * conversion
556
+ from_display = UNIT_DISPLAY[from_normalized] || from_unit
557
+ to_display = UNIT_DISPLAY[to_normalized] || to_unit
558
+ "#{format_number(value)} #{from_display} (#{format_number(converted)} #{to_display})"
559
+ end
560
+ end
561
+
562
+ def normalize_unit(unit)
563
+ # Remove common variations
564
+ unit.gsub(/\s+/, "")
565
+ end
566
+
567
+ def format_number(value)
568
+ # Format number, removing unnecessary decimals
569
+ rounded = value.round(1)
570
+ if rounded == rounded.to_i
571
+ rounded.to_i.to_s
572
+ else
573
+ format("%.1f", rounded)
574
+ end
575
+ end
576
+
577
+ def celsius_to_fahrenheit(c)
578
+ (c * 9.0 / 5.0 + 32).round
579
+ end
580
+
581
+ def fahrenheit_to_celsius(f)
582
+ ((f - 32) * 5.0 / 9.0).round
583
+ end
584
+
585
+ # Common template expansions
586
+
587
+ def expand_circa(params)
588
+ pos = params[:positional]
589
+ return "" if pos.empty?
590
+
591
+ if pos.length >= 2
592
+ "c. #{pos[0]} – c. #{pos[1]}"
593
+ else
594
+ "c. #{pos[0]}"
595
+ end
596
+ end
597
+
598
+ def expand_floruit(params)
599
+ pos = params[:positional]
600
+ return "" if pos.empty?
601
+
602
+ if pos.length >= 2
603
+ "fl. #{pos[0]}–#{pos[1]}"
604
+ else
605
+ "fl. #{pos[0]}"
606
+ end
607
+ end
608
+
609
+ def expand_reign(params)
610
+ pos = params[:positional]
611
+ return "" if pos.length < 2
612
+
613
+ "r. #{pos[0]}–#{pos[1]}"
614
+ end
615
+
616
+ def expand_marriage(params)
617
+ pos = params[:positional]
618
+ return "" if pos.empty?
619
+
620
+ name = pos[0]
621
+ start_year = pos[1]
622
+ end_year = pos[2]
623
+
624
+ reason = params["reason"]&.downcase
625
+
626
+ if end_year && !end_year.empty?
627
+ end_abbr = case reason
628
+ when "widowed", "wid" then "wid."
629
+ when "died", "d" then "d."
630
+ else "div."
631
+ end
632
+ "#{name} (m. #{start_year}; #{end_abbr} #{end_year})"
633
+ elsif start_year
634
+ "#{name} (m. #{start_year})"
635
+ else
636
+ name.to_s
637
+ end
638
+ end
639
+
640
+ def expand_year_range(params)
641
+ pos = params[:positional]
642
+ return "" if pos.length < 2
643
+
644
+ "#{pos[0]}–#{pos[1]}"
645
+ end
646
+
647
+ # Coordinate template expansion
648
+ def expand_coord(params)
649
+ pos = params[:positional]
650
+ return "" if pos.empty?
651
+
652
+ # Handle different formats:
653
+ # {{coord|lat|lon}} - decimal
654
+ # {{coord|lat|N/S|lon|E/W}} - decimal with direction
655
+ # {{coord|d|m|s|N/S|d|m|s|E/W}} - DMS
656
+
657
+ case pos.length
658
+ when 2
659
+ # Simple lat/lon decimal
660
+ "#{pos[0]}°, #{pos[1]}°"
661
+ when 4
662
+ # lat|N/S|lon|E/W
663
+ if pos[1] =~ /[NS]/i && pos[3] =~ /[EW]/i
664
+ "#{pos[0]}° #{pos[1].upcase}, #{pos[2]}° #{pos[3].upcase}"
665
+ else
666
+ # d|m|N/S|d|m|E/W (no seconds)
667
+ "#{pos[0]}°#{pos[1]}′ #{pos[2].upcase}, #{pos[3]}°#{pos[4]}′ #{pos[5].upcase}" rescue pos.join(", ")
668
+ end
669
+ when 6
670
+ # d|m|N/S|d|m|E/W
671
+ "#{pos[0]}°#{pos[1]}′ #{pos[2].upcase}, #{pos[3]}°#{pos[4]}′ #{pos[5].upcase}"
672
+ when 8
673
+ # d|m|s|N/S|d|m|s|E/W (full DMS)
674
+ "#{pos[0]}°#{pos[1]}′#{pos[2]}″ #{pos[3].upcase}, #{pos[4]}°#{pos[5]}′#{pos[6]}″ #{pos[7].upcase}"
675
+ else
676
+ # Fallback: join with commas
677
+ pos.reject { |p| p =~ /display|format|type|name/i }.join(", ")
678
+ end
679
+ end
680
+
681
+ # Language code to name mapping
682
+ LANGUAGE_NAMES = {
683
+ "aa" => "Afar", "ab" => "Abkhazian", "af" => "Afrikaans", "am" => "Amharic",
684
+ "ar" => "Arabic", "as" => "Assamese", "az" => "Azerbaijani", "ba" => "Bashkir",
685
+ "be" => "Belarusian", "bg" => "Bulgarian", "bn" => "Bengali", "bo" => "Tibetan",
686
+ "br" => "Breton", "ca" => "Catalan", "cs" => "Czech", "cy" => "Welsh",
687
+ "da" => "Danish", "de" => "German", "dv" => "Divehi", "dz" => "Dzongkha",
688
+ "el" => "Greek", "en" => "English", "eo" => "Esperanto", "es" => "Spanish",
689
+ "et" => "Estonian", "eu" => "Basque", "fa" => "Persian", "fi" => "Finnish",
690
+ "fj" => "Fijian", "fo" => "Faroese", "fr" => "French", "fy" => "Frisian",
691
+ "ga" => "Irish", "gd" => "Scottish Gaelic", "gl" => "Galician", "gn" => "Guarani",
692
+ "gu" => "Gujarati", "ha" => "Hausa", "he" => "Hebrew", "hi" => "Hindi",
693
+ "hr" => "Croatian", "hu" => "Hungarian", "hy" => "Armenian", "id" => "Indonesian",
694
+ "is" => "Icelandic", "it" => "Italian", "ja" => "Japanese", "jv" => "Javanese",
695
+ "ka" => "Georgian", "kk" => "Kazakh", "km" => "Khmer", "kn" => "Kannada",
696
+ "ko" => "Korean", "ku" => "Kurdish", "ky" => "Kyrgyz", "la" => "Latin",
697
+ "lb" => "Luxembourgish", "lo" => "Lao", "lt" => "Lithuanian", "lv" => "Latvian",
698
+ "mg" => "Malagasy", "mi" => "Maori", "mk" => "Macedonian", "ml" => "Malayalam",
699
+ "mn" => "Mongolian", "mr" => "Marathi", "ms" => "Malay", "mt" => "Maltese",
700
+ "my" => "Burmese", "ne" => "Nepali", "nl" => "Dutch", "no" => "Norwegian",
701
+ "oc" => "Occitan", "or" => "Oriya", "pa" => "Punjabi", "pl" => "Polish",
702
+ "ps" => "Pashto", "pt" => "Portuguese", "qu" => "Quechua", "rm" => "Romansh",
703
+ "ro" => "Romanian", "ru" => "Russian", "rw" => "Kinyarwanda", "sa" => "Sanskrit",
704
+ "sc" => "Sardinian", "sd" => "Sindhi", "se" => "Northern Sami", "si" => "Sinhala",
705
+ "sk" => "Slovak", "sl" => "Slovenian", "sm" => "Samoan", "sn" => "Shona",
706
+ "so" => "Somali", "sq" => "Albanian", "sr" => "Serbian", "ss" => "Swati",
707
+ "st" => "Southern Sotho", "su" => "Sundanese", "sv" => "Swedish", "sw" => "Swahili",
708
+ "ta" => "Tamil", "te" => "Telugu", "tg" => "Tajik", "th" => "Thai",
709
+ "ti" => "Tigrinya", "tk" => "Turkmen", "tl" => "Tagalog", "tn" => "Tswana",
710
+ "to" => "Tongan", "tr" => "Turkish", "ts" => "Tsonga", "tt" => "Tatar",
711
+ "tw" => "Twi", "ug" => "Uyghur", "uk" => "Ukrainian", "ur" => "Urdu",
712
+ "uz" => "Uzbek", "ve" => "Venda", "vi" => "Vietnamese", "vo" => "Volapük",
713
+ "wa" => "Walloon", "wo" => "Wolof", "xh" => "Xhosa", "yi" => "Yiddish",
714
+ "yo" => "Yoruba", "za" => "Zhuang", "zh" => "Chinese", "zu" => "Zulu",
715
+ # Extended codes
716
+ "grc" => "Ancient Greek", "ang" => "Old English", "fro" => "Old French",
717
+ "gmh" => "Middle High German", "non" => "Old Norse", "peo" => "Old Persian",
718
+ "sga" => "Old Irish", "syc" => "Classical Syriac"
719
+ }.freeze
720
+
721
+ def expand_lang(params)
722
+ pos = params[:positional]
723
+ return "" if pos.length < 2
724
+
725
+ text = pos[1] || ""
726
+ lit = params["lit"]
727
+
728
+ if lit
729
+ "#{text} (lit. '#{lit}')"
730
+ else
731
+ text
732
+ end
733
+ end
734
+
735
+ def expand_lang_xx(template_name, params)
736
+ pos = params[:positional]
737
+ return "" if pos.empty?
738
+
739
+ lang_code = template_name.sub("lang-", "")
740
+ lang_name = LANGUAGE_NAMES[lang_code] || lang_code.upcase
741
+ text = pos[0] || ""
742
+ lit = params["lit"]
743
+
744
+ if lit
745
+ "#{lang_name}: #{text} (lit. '#{lit}')"
746
+ else
747
+ "#{lang_name}: #{text}"
748
+ end
749
+ end
750
+
751
+ def expand_transl(params)
752
+ pos = params[:positional]
753
+ return "" if pos.empty?
754
+
755
+ # {{transl|lang|text}} - just return the text
756
+ pos[1] || pos[0] || ""
757
+ end
758
+
759
+ def expand_nihongo(params)
760
+ pos = params[:positional]
761
+ return "" if pos.empty?
762
+
763
+ english = pos[0] || ""
764
+ kanji = pos[1] || ""
765
+ romaji = pos[2]
766
+
767
+ parts = [english]
768
+ parts << "(#{kanji}" if kanji && !kanji.empty?
769
+
770
+ if romaji && !romaji.empty?
771
+ parts[-1] += ", #{romaji})" if parts.length > 1
772
+ elsif parts.length > 1
773
+ parts[-1] += ")"
774
+ end
775
+
776
+ parts.join(" ")
777
+ end
778
+
779
+ # Blockquote template - extracts quoted text
780
+ def expand_blockquote(params)
781
+ pos = params[:positional]
782
+ text = params["text"] || params["1"] || pos[0] || ""
783
+ source = params["source"] || params["2"] || pos[1]
784
+
785
+ result = text.strip
786
+ result += " — #{source}" if source && !source.empty?
787
+ result
788
+ end
789
+
790
+ # Fraction template - formats fractions
791
+ def expand_fraction(params)
792
+ pos = params[:positional]
793
+ return "" if pos.empty?
794
+
795
+ case pos.length
796
+ when 1
797
+ # Just denominator: {{frac|2}} -> 1/2
798
+ "1/#{pos[0]}"
799
+ when 2
800
+ # Numerator and denominator: {{frac|1|2}} -> 1/2
801
+ "#{pos[0]}/#{pos[1]}"
802
+ when 3
803
+ # Whole, numerator, denominator: {{frac|1|1|4}} -> 1+1/4
804
+ "#{pos[0]}+#{pos[1]}/#{pos[2]}"
805
+ else
806
+ pos.join("/")
807
+ end
808
+ end
809
+
810
+ # As of template - formats date reference
811
+ def expand_as_of(params)
812
+ pos = params[:positional]
813
+ return "" if pos.empty?
814
+
815
+ year = pos[0]
816
+ month = pos[1]
817
+ day = pos[2]
818
+
819
+ if day && month && year
820
+ "As of #{MONTH_NAMES[month.to_i - 1]} #{day}, #{year}"
821
+ elsif month && year
822
+ "As of #{MONTH_NAMES[month.to_i - 1]} #{year}"
823
+ elsif year
824
+ "As of #{year}"
825
+ else
826
+ ""
827
+ end
828
+ end
829
+ end
830
+ end