wp2txt 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -16,9 +16,6 @@ WP2TXT before version 0.4.0 came with Mac/Windows GUI. Now it's become a pure co
16
16
 
17
17
  ### Installation
18
18
 
19
- $ gem install bundler
20
- $ bundle install
21
-
22
19
  $ gem install wp2txt
23
20
 
24
21
  ### Usage
@@ -33,18 +30,20 @@ Command line options are as follows:
33
30
 
34
31
  Usage: wp2txt [options]
35
32
  where [options] are:
36
- --input-file, -i: Wikipedia dump file with .bz2 (compressed) or .txt (uncompressed) format
37
- --output-dir, -o <s>: Output directory (default: current directory)
38
- --convert-off, -c: Output XML (without converting to plain text)
39
- --list-off, -l: Exclude list items from output
40
- --heading-off, -d: Exclude section titles from output
41
- --title-off, -t: Exclude page titles from output
42
- --table-off, -a: Exclude page titles from output (default: true)
43
- --template-off, -e: Remove multi-line template notations from output
44
- --strip-marker, -s: Remove symbols prefixed to list items, definitions, etc.
45
- --file-size, -f <i>: Approximate size (in MB) of each output file (default: 10)
46
- --version, -v: Print version and exit
47
- --help, -h: Show this message
33
+ --input-file, -i: Wikipedia dump file with .bz2 (compressed) or .txt (uncompressed) format
34
+ --output-dir, -o <s>: Output directory (default: Present working directory)
35
+ --convert-off, -c: Output XML (without converting to plain text)
36
+ --list-off, -l: Exclude list items from output
37
+ --heading-off, -d: Exclude section titles from output
38
+ --title-off, -t: Exclude page titles from output
39
+ --table-off, --no-table-off, -a: Exclude page titles from output (default: true)
40
+ --template-off, --no-template-off, -e: Remove template notations from output (default: true)
41
+ --redirect-off, -r: Not show redirect destination
42
+ --strip-marker, -s: Remove symbols prefixed to list items, definitions, etc.
43
+ --category-off, -g: Not show article category information
44
+ --file-size, -f <i>: Approximate size (in MB) of each output file (default: 10)
45
+ --version, -v: Print version and exit
46
+ --help, -h: Show this message
48
47
 
49
48
  ### Limitations ###
50
49
 
data/bin/wp2txt CHANGED
@@ -34,7 +34,7 @@ EOS
34
34
  opt :template_off, "Remove template notations from output", :default => true
35
35
  opt :redirect_off, "Not show redirect destination", :default => false
36
36
  opt :strip_marker, "Remove symbols prefixed to list items, definitions, etc.", :default => false
37
- opt :category_off, "Not output article category information", :default => false
37
+ opt :category_off, "Not show article category information", :default => false
38
38
  opt :file_size, "Approximate size (in MB) of each output file", :default => 10
39
39
  end
40
40
  Trollop::die :size, "must be larger than 0" unless opts[:file_size] >= 0
@@ -70,8 +70,9 @@ module Wp2txt
70
70
  @@blank_line_regex = Regexp.new('^\s*$')
71
71
 
72
72
  @@redirect_regex = Regexp.new('#(?:REDIRECT|転送)\s+\[\[(.+)\]\]', Regexp::IGNORECASE)
73
-
74
- @@category_regex = Regexp.new('[\{\[\|\b](?:C|c)ategory\:(.*?)[\}\]\|\b]')
73
+
74
+ category_patterns = ["Category", "Categoria"].join("|")
75
+ @@category_regex = Regexp.new('[\{\[\|\b](?:' + category_patterns + ')\:(.*?)[\}\]\|\b]', Regexp::IGNORECASE)
75
76
 
76
77
  def initialize(text, title = "", strip_tmarker = false)
77
78
  @title = title.strip
@@ -1,3 +1,3 @@
1
1
  module Wp2txt
2
- VERSION = "0.5.0"
2
+ VERSION = "0.5.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wp2txt
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-14 00:00:00.000000000 Z
12
+ date: 2013-01-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec