wp2txt 0.7.5 → 0.7.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 911e08e181a6bedb664b797d49183d0988daeba5
4
- data.tar.gz: 076d1349a8aa8cf454dac42bdce7b89a82f3fca0
3
+ metadata.gz: b4dff862e52128851fb4db35a59a1f4e3e9e473f
4
+ data.tar.gz: e38612ada2785a2fed1e975460451b0d0a703e5e
5
5
  SHA512:
6
- metadata.gz: 4ebc035e4f1635f150294d8b79eb474457a280707a416688f3e7712bb7788d15888b6718bfd6f4e3a790e6fb8a7623e1415255fde913bfe658dd237fa7f599cd
7
- data.tar.gz: ccee00a9e1b85186d52d0b3c07b52c04fff1ecd133ff245010943312cf37e279874b5f3a757880c005ad877e957df6a4176af2269f40b3c3210951530eb4c511
6
+ metadata.gz: 19159a3706d0dfb687c239f3200ac3685916e63a4e3ec292d3e47acf4e8787c5ed728837bdf4b5586691adacdc75decddc1b2c7937ad483ca15a3663213d32c9
7
+ data.tar.gz: 8d956611e29eb4fa0d058ab6459bd96167352fdb5ee44b1819fe4688983a8ebf146cc2413e609029558d845bf9e93f9e27eaca9a35f34bf4644c234a4809146c
data/bin/wp2txt CHANGED
@@ -127,8 +127,9 @@ wpconv.extract_text do |article|
127
127
  if /\A\s*\z/m =~ contents
128
128
  result = ""
129
129
  else
130
- result = config[:title] ? title + "\n" + contents : contents
130
+ result = config[:title] ? title + "\n" << contents : contents
131
131
  end
132
- result = result.gsub(/\[ref\]\s*\[\/ref\]/m){""}
133
- result = result.gsub(/\n\n\n+/m){"\n\n"} + "\n"
132
+ result.gsub!(/\[ref\]\s*\[\/ref\]/m){""}
133
+ result.gsub!(/\n\n\n+/m){"\n\n"}
134
+ result << "\n"
134
135
  end
@@ -68,8 +68,6 @@ $list_marks_regex = Regexp.new('\A[\*\#\;\:\ ]+')
68
68
  $pre_marks_regex = Regexp.new('\A\^\ ')
69
69
  $def_marks_regex = Regexp.new('\A[\;\:\ ]+')
70
70
  $onset_bar_regex = Regexp.new('\A[^\|]+\z')
71
- # $remove_table_regex = Regexp.new('\{\|[^\{\|\}]*?\|\}', Regexp::MULTILINE)
72
- # $remove_clade_regex = Regexp.new('\{\{(?:C|c)lade[^\{\}]*\}\}', Regexp::MULTILINE)
73
71
 
74
72
  $category_patterns = ["Category", "Categoria"].join("|")
75
73
  $category_regex = Regexp.new('[\{\[\|\b](?:' + $category_patterns + ')\:(.*?)[\}\]\|\b]', Regexp::IGNORECASE)
@@ -340,44 +338,7 @@ module Wp2txt
340
338
  end
341
339
  end
342
340
 
343
- #################### methods currently unused ####################
344
-
345
- # def process_template(str)
346
- # scanner = StringScanner.new(str)
347
- # result = process_nested_structure(scanner, "{{", "}}", $limit_recur) do |contents|
348
- # parts = contents.split("|")
349
- # case parts.size
350
- # when 0
351
- # ""
352
- # when 1
353
- # parts.first || ""
354
- # else
355
- # if parts.last.split("=").size > 1
356
- # parts.first || ""
357
- # else
358
- # parts.last || ""
359
- # end
360
- # end
361
- # end
362
- # result
363
- # end
364
-
365
- # def remove_table(str)
366
- # new_str = str.gsub($remove_table_regex, "")
367
- # if str != new_str
368
- # new_str = remove_table(new_str)
369
- # end
370
- # new_str = remove_table(new_str) unless str == new_str
371
- # return new_str
372
- # end
373
-
374
- # def remove_clade(page)
375
- # new_page = page.gsub($remove_clade_regex, "")
376
- # new_page = remove_clade(new_page) unless page == new_page
377
- # new_page
378
- # end
379
-
380
- #################### file related utilities ####################
341
+ #################### file related utilities ####################
381
342
 
382
343
  # collect filenames recursively
383
344
  def collect_files(str, regex = nil)
@@ -1,3 +1,3 @@
1
1
  module Wp2txt
2
- VERSION = "0.7.5"
2
+ VERSION = "0.7.6"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wp2txt
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.5
4
+ version: 0.7.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yoichiro Hasebe
@@ -99,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
99
99
  version: '0'
100
100
  requirements: []
101
101
  rubyforge_project: wp2txt
102
- rubygems_version: 2.4.4
102
+ rubygems_version: 2.4.3
103
103
  signing_key:
104
104
  specification_version: 4
105
105
  summary: Wikipedia dump to text converter