wp2txt 0.5.1 → 0.5.02

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -30,20 +30,25 @@ Command line options are as follows:
30
30
 
31
31
  Usage: wp2txt [options]
32
32
  where [options] are:
33
- --input-file, -i: Wikipedia dump file with .bz2 (compressed) or .txt (uncompressed) format
34
- --output-dir, -o <s>: Output directory (default: Present working directory)
35
- --convert-off, -c: Output XML (without converting to plain text)
36
- --list-off, -l: Exclude list items from output
37
- --heading-off, -d: Exclude section titles from output
38
- --title-off, -t: Exclude page titles from output
39
- --table-off, --no-table-off, -a: Exclude page titles from output (default: true)
40
- --template-off, --no-template-off, -e: Remove template notations from output (default: true)
41
- --redirect-off, -r: Not show redirect destination
42
- --strip-marker, -s: Remove symbols prefixed to list items, definitions, etc.
43
- --category-off, -g: Not show article category information
44
- --file-size, -f <i>: Approximate size (in MB) of each output file (default: 10)
45
- --version, -v: Print version and exit
46
- --help, -h: Show this message
33
+ --input-file, -i: Wikipedia dump file with .bz2 (compressed) or .txt
34
+ (uncompressed) format
35
+ --output-dir, -o <s>: Output directory (default:
36
+ /Users/yohasebe/Dropbox/code/wp2txt)
37
+ --convert-off, -c: Output XML (without converting to plain text)
38
+ --list-off, -l: Exclude list items from output
39
+ --heading-off, -d: Exclude section titles from output
40
+ --title-off, -t: Exclude page titles from output
41
+ --table-off, --no-table-off, -a: Exclude page titles from output (default: true)
42
+ --template-off, --no-template-off, -e: Remove template notations from output (default:
43
+ true)
44
+ --redirect-off, -r: Not show redirect destination
45
+ --strip-marker, -s: Remove symbols prefixed to list items, definitions,
46
+ etc.
47
+ --category-off, -g: Not show article category information
48
+ --file-size, -f <i>: Approximate size (in MB) of each output file
49
+ (default: 10)
50
+ --version, -v: Print version and exit
51
+ --help, -h: Show this message
47
52
 
48
53
  ### Limitations ###
49
54
 
@@ -70,9 +70,8 @@ module Wp2txt
70
70
  @@blank_line_regex = Regexp.new('^\s*$')
71
71
 
72
72
  @@redirect_regex = Regexp.new('#(?:REDIRECT|転送)\s+\[\[(.+)\]\]', Regexp::IGNORECASE)
73
-
74
- category_patterns = ["Category", "Categoria"].join("|")
75
- @@category_regex = Regexp.new('[\{\[\|\b](?:' + category_patterns + ')\:(.*?)[\}\]\|\b]', Regexp::IGNORECASE)
73
+
74
+ @@category_regex = Regexp.new('[\{\[\|\b](?:C|c)ategory\:(.*?)[\}\]\|\b]')
76
75
 
77
76
  def initialize(text, title = "", strip_tmarker = false)
78
77
  @title = title.strip
@@ -1,3 +1,3 @@
1
1
  module Wp2txt
2
- VERSION = "0.5.1"
2
+ VERSION = "0.5.02"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wp2txt
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.02
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-16 00:00:00.000000000 Z
12
+ date: 2013-01-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec