wp2txt 0.5.0 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -16,9 +16,6 @@ WP2TXT before version 0.4.0 came with Mac/Windows GUI. Now it's become a pure co
16
16
 
17
17
  ### Installation
18
18
 
19
- $ gem install bundler
20
- $ bundle install
21
-
22
19
  $ gem install wp2txt
23
20
 
24
21
  ### Usage
@@ -33,18 +30,20 @@ Command line options are as follows:
33
30
 
34
31
  Usage: wp2txt [options]
35
32
  where [options] are:
36
- --input-file, -i: Wikipedia dump file with .bz2 (compressed) or .txt (uncompressed) format
37
- --output-dir, -o <s>: Output directory (default: current directory)
38
- --convert-off, -c: Output XML (without converting to plain text)
39
- --list-off, -l: Exclude list items from output
40
- --heading-off, -d: Exclude section titles from output
41
- --title-off, -t: Exclude page titles from output
42
- --table-off, -a: Exclude page titles from output (default: true)
43
- --template-off, -e: Remove multi-line template notations from output
44
- --strip-marker, -s: Remove symbols prefixed to list items, definitions, etc.
45
- --file-size, -f <i>: Approximate size (in MB) of each output file (default: 10)
46
- --version, -v: Print version and exit
47
- --help, -h: Show this message
33
+ --input-file, -i: Wikipedia dump file with .bz2 (compressed) or .txt (uncompressed) format
34
+ --output-dir, -o <s>: Output directory (default: Present working directory)
35
+ --convert-off, -c: Output XML (without converting to plain text)
36
+ --list-off, -l: Exclude list items from output
37
+ --heading-off, -d: Exclude section titles from output
38
+ --title-off, -t: Exclude page titles from output
39
+ --table-off, --no-table-off, -a: Exclude page titles from output (default: true)
40
+ --template-off, --no-template-off, -e: Remove template notations from output (default: true)
41
+ --redirect-off, -r: Not show redirect destination
42
+ --strip-marker, -s: Remove symbols prefixed to list items, definitions, etc.
43
+ --category-off, -g: Not show article category information
44
+ --file-size, -f <i>: Approximate size (in MB) of each output file (default: 10)
45
+ --version, -v: Print version and exit
46
+ --help, -h: Show this message
48
47
 
49
48
  ### Limitations ###
50
49
 
data/bin/wp2txt CHANGED
@@ -34,7 +34,7 @@ EOS
34
34
  opt :template_off, "Remove template notations from output", :default => true
35
35
  opt :redirect_off, "Not show redirect destination", :default => false
36
36
  opt :strip_marker, "Remove symbols prefixed to list items, definitions, etc.", :default => false
37
- opt :category_off, "Not output article category information", :default => false
37
+ opt :category_off, "Not show article category information", :default => false
38
38
  opt :file_size, "Approximate size (in MB) of each output file", :default => 10
39
39
  end
40
40
  Trollop::die :size, "must be larger than 0" unless opts[:file_size] >= 0
@@ -70,8 +70,9 @@ module Wp2txt
70
70
  @@blank_line_regex = Regexp.new('^\s*$')
71
71
 
72
72
  @@redirect_regex = Regexp.new('#(?:REDIRECT|転送)\s+\[\[(.+)\]\]', Regexp::IGNORECASE)
73
-
74
- @@category_regex = Regexp.new('[\{\[\|\b](?:C|c)ategory\:(.*?)[\}\]\|\b]')
73
+
74
+ category_patterns = ["Category", "Categoria"].join("|")
75
+ @@category_regex = Regexp.new('[\{\[\|\b](?:' + category_patterns + ')\:(.*?)[\}\]\|\b]', Regexp::IGNORECASE)
75
76
 
76
77
  def initialize(text, title = "", strip_tmarker = false)
77
78
  @title = title.strip
@@ -1,3 +1,3 @@
1
1
  module Wp2txt
2
- VERSION = "0.5.0"
2
+ VERSION = "0.5.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wp2txt
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-14 00:00:00.000000000 Z
12
+ date: 2013-01-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec