wp2txt 0.7.5 → 0.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 911e08e181a6bedb664b797d49183d0988daeba5
4
- data.tar.gz: 076d1349a8aa8cf454dac42bdce7b89a82f3fca0
3
+ metadata.gz: b4dff862e52128851fb4db35a59a1f4e3e9e473f
4
+ data.tar.gz: e38612ada2785a2fed1e975460451b0d0a703e5e
5
5
  SHA512:
6
- metadata.gz: 4ebc035e4f1635f150294d8b79eb474457a280707a416688f3e7712bb7788d15888b6718bfd6f4e3a790e6fb8a7623e1415255fde913bfe658dd237fa7f599cd
7
- data.tar.gz: ccee00a9e1b85186d52d0b3c07b52c04fff1ecd133ff245010943312cf37e279874b5f3a757880c005ad877e957df6a4176af2269f40b3c3210951530eb4c511
6
+ metadata.gz: 19159a3706d0dfb687c239f3200ac3685916e63a4e3ec292d3e47acf4e8787c5ed728837bdf4b5586691adacdc75decddc1b2c7937ad483ca15a3663213d32c9
7
+ data.tar.gz: 8d956611e29eb4fa0d058ab6459bd96167352fdb5ee44b1819fe4688983a8ebf146cc2413e609029558d845bf9e93f9e27eaca9a35f34bf4644c234a4809146c
data/bin/wp2txt CHANGED
@@ -127,8 +127,9 @@ wpconv.extract_text do |article|
127
127
  if /\A\s*\z/m =~ contents
128
128
  result = ""
129
129
  else
130
- result = config[:title] ? title + "\n" + contents : contents
130
+ result = config[:title] ? title + "\n" << contents : contents
131
131
  end
132
- result = result.gsub(/\[ref\]\s*\[\/ref\]/m){""}
133
- result = result.gsub(/\n\n\n+/m){"\n\n"} + "\n"
132
+ result.gsub!(/\[ref\]\s*\[\/ref\]/m){""}
133
+ result.gsub!(/\n\n\n+/m){"\n\n"}
134
+ result << "\n"
134
135
  end
@@ -68,8 +68,6 @@ $list_marks_regex = Regexp.new('\A[\*\#\;\:\ ]+')
68
68
  $pre_marks_regex = Regexp.new('\A\^\ ')
69
69
  $def_marks_regex = Regexp.new('\A[\;\:\ ]+')
70
70
  $onset_bar_regex = Regexp.new('\A[^\|]+\z')
71
- # $remove_table_regex = Regexp.new('\{\|[^\{\|\}]*?\|\}', Regexp::MULTILINE)
72
- # $remove_clade_regex = Regexp.new('\{\{(?:C|c)lade[^\{\}]*\}\}', Regexp::MULTILINE)
73
71
 
74
72
  $category_patterns = ["Category", "Categoria"].join("|")
75
73
  $category_regex = Regexp.new('[\{\[\|\b](?:' + $category_patterns + ')\:(.*?)[\}\]\|\b]', Regexp::IGNORECASE)
@@ -340,44 +338,7 @@ module Wp2txt
340
338
  end
341
339
  end
342
340
 
343
- #################### methods currently unused ####################
344
-
345
- # def process_template(str)
346
- # scanner = StringScanner.new(str)
347
- # result = process_nested_structure(scanner, "{{", "}}", $limit_recur) do |contents|
348
- # parts = contents.split("|")
349
- # case parts.size
350
- # when 0
351
- # ""
352
- # when 1
353
- # parts.first || ""
354
- # else
355
- # if parts.last.split("=").size > 1
356
- # parts.first || ""
357
- # else
358
- # parts.last || ""
359
- # end
360
- # end
361
- # end
362
- # result
363
- # end
364
-
365
- # def remove_table(str)
366
- # new_str = str.gsub($remove_table_regex, "")
367
- # if str != new_str
368
- # new_str = remove_table(new_str)
369
- # end
370
- # new_str = remove_table(new_str) unless str == new_str
371
- # return new_str
372
- # end
373
-
374
- # def remove_clade(page)
375
- # new_page = page.gsub($remove_clade_regex, "")
376
- # new_page = remove_clade(new_page) unless page == new_page
377
- # new_page
378
- # end
379
-
380
- #################### file related utilities ####################
341
+ #################### file related utilities ####################
381
342
 
382
343
  # collect filenames recursively
383
344
  def collect_files(str, regex = nil)
@@ -1,3 +1,3 @@
1
1
  module Wp2txt
2
- VERSION = "0.7.5"
2
+ VERSION = "0.7.6"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wp2txt
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.5
4
+ version: 0.7.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yoichiro Hasebe
@@ -99,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
99
99
  version: '0'
100
100
  requirements: []
101
101
  rubyforge_project: wp2txt
102
- rubygems_version: 2.4.4
102
+ rubygems_version: 2.4.3
103
103
  signing_key:
104
104
  specification_version: 4
105
105
  summary: Wikipedia dump to text converter