wp2txt 1.0.2 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bb540f4f17f7825786d110245c235ac556e3e64cedb17efae3e0591887425801
4
- data.tar.gz: 479c357f7ba117ae10d9a5a04d24ce3aca2e54d942a156b02eb932c1aab55c8b
3
+ metadata.gz: 62f1e8d6ab1932f3ae3c34fb71930b7e73500c832481dcea6288742c38850a79
4
+ data.tar.gz: f0ff0a5488b635b828338d41029c5ad191a0c88282c0fa294a9facf2d93c055b
5
5
  SHA512:
6
- metadata.gz: 940d47d2c8bce06029fe76e3b3744563d089e26e297e5224b36e65d815295da57117eae84cbb43abeddf2f2c052e2a987d668cba52c7af6148e935b571b6d403
7
- data.tar.gz: 8ce76523a3bf181ac7a5da11f088dd14cfb1e1d7ac0d5239832db52968d183db16a3ece6074513b634eebe0e5ca28ceea945eaef6542ecb1933266caf4e89a3c
6
+ metadata.gz: 7bca85758e88d53dcd33fe43e83a251624f89329a2cb55ffb97b41141bcf8fe5ace7c48e3b8e49f5aa42f84724247cfe4ad376238a949e9154876d4d07469afe
7
+ data.tar.gz: de59399d5163afed2947e0802abf2e0365894d566c8a1f11823bc901d4948346e7af47d6fba558387f5af7e1301a6725a51a322ac1cd4810264dc3003e0729e2
data/.dockerignore ADDED
@@ -0,0 +1,8 @@
1
+ .git
2
+ .github
3
+ image
4
+ pkg
5
+ spec
6
+ .dockerignore
7
+ .gitignore
8
+ Gemfile.lock
@@ -0,0 +1,36 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ pull_request:
6
+ workflow_dispatch:
7
+ schedule:
8
+ - cron: '42 5 * * *'
9
+
10
+ jobs:
11
+ test:
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ ruby: [ '3.1' ]
16
+
17
+ runs-on: ubuntu-latest
18
+ name: Ruby ${{matrix.ruby}}
19
+ container: ruby:${{matrix.ruby}}
20
+
21
+ steps:
22
+ - uses: actions/checkout@v3
23
+
24
+ - name: Show Ruby Version
25
+ run: ruby -v
26
+
27
+ - name: Install dependencies
28
+ run: bundle install
29
+
30
+ - name: Install rspec
31
+ run: gem install rspec
32
+
33
+ - name: Run tests
34
+ run: rspec
35
+
36
+
data/.gitignore CHANGED
@@ -18,4 +18,4 @@ tmp
18
18
  .DS_Store
19
19
  *.bak
20
20
  *.~
21
-
21
+ tags
data/.rubocop.yml ADDED
@@ -0,0 +1,80 @@
1
+ AllCops:
2
+ NewCops: disable
3
+ SuggestExtensions: false
4
+ TargetRubyVersion: 2.6
5
+
6
+ Documentation:
7
+ Enabled: false
8
+
9
+ Naming/AccessorMethodName:
10
+ Enabled: false
11
+
12
+ Naming/VariableNumber:
13
+ Enabled: false
14
+
15
+ Naming/FileName:
16
+ Enabled: false
17
+
18
+ Security/MarshalLoad:
19
+ Enabled: false
20
+
21
+ Security/Open:
22
+ Enabled: false
23
+
24
+ Layout/EndOfLine:
25
+ Enabled: False
26
+
27
+ Style/FormatStringToken:
28
+ Enabled: false
29
+
30
+ Style/ClassVars:
31
+ Enabled: false
32
+
33
+ Style/OptionalBooleanParameter:
34
+ Enabled: false
35
+
36
+ Style/StringConcatenation:
37
+ Enabled: false
38
+
39
+ Style/PerlBackrefs:
40
+ Enabled: false
41
+
42
+ Style/StringLiterals:
43
+ Enabled: false
44
+
45
+ Style/StringLiteralsInInterpolation:
46
+ Enabled: true
47
+ EnforcedStyle: double_quotes
48
+
49
+ Style/WordArray:
50
+ Enabled: false
51
+
52
+ Style/EvalWithLocation:
53
+ Enabled: false
54
+
55
+ Layout/LineLength:
56
+ Max: 400
57
+
58
+ Metrics/MethodLength:
59
+ Max: 200
60
+
61
+ Metrics/BlockLength:
62
+ Max: 200
63
+
64
+ Metrics/AbcSize:
65
+ Max: 200
66
+
67
+ Metrics/PerceivedComplexity:
68
+ Max: 60
69
+
70
+ Metrics/ClassLength:
71
+ Max: 800
72
+
73
+ Metrics/CyclomaticComplexity:
74
+ Max: 60
75
+
76
+ Metrics/ParameterLists:
77
+ Max: 8
78
+
79
+ Metrics/ModuleLength:
80
+ Max: 600
data/.solargraph.yml ADDED
@@ -0,0 +1,22 @@
1
+ ---
2
+ include:
3
+ - "**/*.rb"
4
+ exclude:
5
+ - spec/**/*
6
+ - test/**/*
7
+ - vendor/**/*
8
+ - ".bundle/**/*"
9
+ require: []
10
+ domains: []
11
+ reporters:
12
+ - rubocop
13
+ # - require_not_found
14
+ formatter:
15
+ rubocop:
16
+ cops: safe
17
+ except: []
18
+ only: []
19
+ extra_args: []
20
+ require_paths: []
21
+ plugins: []
22
+ max_files: 5000
data/Dockerfile ADDED
@@ -0,0 +1,20 @@
1
+ FROM ruby:3.1.3-alpine3.17
2
+
3
+ WORKDIR /wp2txt
4
+ COPY . ./
5
+ RUN rm -Rf wp2txt/Gemfile.lock
6
+
7
+ RUN apk update && \
8
+ apk upgrade && \
9
+ apk add --no-cache linux-headers libxml2-dev make gcc libc-dev bash && \
10
+ apk add --no-cache -t .build-packages --no-cache build-base curl-dev wget gcompat && \
11
+ bundle install -j4
12
+
13
+ RUN wget https://fossies.org/linux/privat/lbzip2-2.5.tar.gz -O lbzip2.tar.gz && \
14
+ tar -xvf lbzip2.tar.gz && cd lbzip2-2.5 && \
15
+ bash configure && make && make install && \
16
+ cd .. && rm -rf lbzip2*
17
+
18
+ WORKDIR /
19
+ ENV PATH $PATH:/wp2txt/bin
20
+ CMD ["bash"]
data/Gemfile CHANGED
@@ -1,4 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source "http://rubygems.org"
2
4
 
3
- # Specify your gem's dependencies in wp2txt.gemspec
4
- gemspec
5
+ gem "htmlentities"
6
+ gem "nokogiri"
7
+ gem "optimist"
8
+ gem "parallel"
9
+ gem "pastel"
10
+ gem "ruby-progressbar"
11
+ gem "tty-spinner"
data/README.md CHANGED
@@ -8,6 +8,10 @@ WP2TXT extracts text and category data from Wikipedia dump files (encoded in XML
8
8
 
9
9
  ## Changelog
10
10
 
11
+ **December 2022**
12
+
13
+ - Docker images available via Docker Hub
14
+
11
15
  **November 2022**
12
16
 
13
17
  - Code added to suppress "Invalid byte sequence error" when an ilegal UTF-8 character is input.
@@ -38,9 +42,28 @@ In the above environment, the process (decompression, splitting, extraction, and
38
42
  - Allows extracting category information of the article
39
43
  - Allows extracting opening paragraphs of the article
40
44
 
41
- ## Preparation
45
+ ## Setting Up
46
+
47
+ ### WP2TXT on Docker
48
+
49
+ 1. Install [Docker Desktop](https://www.docker.com/products/docker-desktop/) (Mac/Windows/Linux)
50
+ 2. Execute `docker` command in a terminal:
51
+
52
+ ```shell
53
+ docker run -it -v /Users/me/localdata:/data yohasebe/wp2txt
54
+ ```
55
+
56
+ - Make sure to Replace `/Users/me/localdata` with the full path to the data directory in your local computer
57
+
58
+ 3. The Docker image will begin downloading and a bash prompt will appear when finished.
59
+ 4. The `wp2txt` command will be avalable anywhare in the Docker container. Use the `/data` directory as the location of the input dump files and the output text files.
60
+
61
+ **IMPORTANT:**
62
+
63
+ - Configure Docker Desktop resource settings (number of cores, amount of memory, etc.) to get the best performance possible.
64
+ - When running the `wp2txt` command inside a Docker container, be sure to set the output directory to somewhere in the mounted local directory specified by the `docker run` command.
42
65
 
43
- ### For MacOS and Linux
66
+ ### WP2TXT on MacOS and Linux
44
67
 
45
68
  WP2TXT requires that one of the following commands be installed on the system in order to decompress `bz2` files:
46
69
 
@@ -54,7 +77,7 @@ If you are using MacOS with Homebrew installed, you can install `lbzip2` with th
54
77
 
55
78
  $ brew install lbzip2
56
79
 
57
- ### For Windows
80
+ ### WP2TXT on Windows
58
81
 
59
82
  Install [Bzip2 for Windows](http://gnuwin32.sourceforge.net/packages/bzip2.htm) and set the path so that WP2TXT can use the bunzip2.exe command. Alternatively, you can extract the Wikipedia dump file in your own way and process the resulting XML file with WP2TXT.
60
83
 
data/Rakefile CHANGED
@@ -1,9 +1,30 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "bundler/gem_tasks"
2
- require 'rspec/core'
3
- require 'rspec/core/rake_task'
4
+ require "rspec/core"
5
+ require "rspec/core/rake_task"
6
+ require_relative "./lib/wp2txt/version"
7
+
8
+ class String
9
+ def strip_heredoc
10
+ gsub(/^#{scan(/^[ \t]*(?=\S)/).min}/, "")
11
+ end
12
+ end
4
13
 
5
14
  RSpec::Core::RakeTask.new(:spec) do |spec|
6
- spec.pattern = FileList['spec/**/*_spec.rb']
15
+ spec.pattern = FileList["spec/**/*_spec.rb"]
7
16
  end
8
17
 
9
- task :default => :spec
18
+ task default: :spec
19
+
20
+ desc "Push Docker images"
21
+ task :push do
22
+ sh <<-SCRIPT.strip_heredoc, { verbose: false }
23
+ /bin/bash -xeu <<'BASH'
24
+ # docker buildx create --name mybuilder
25
+ # docker buildx use mybuilder
26
+ # docker buildx inspect --bootstrap
27
+ docker buildx build --platform linux/amd64,linux/arm64 -t yohasebe/wp2txt:#{Wp2txt::VERSION} -t yohasebe/wp2txt:latest . --push
28
+ BASH
29
+ SCRIPT
30
+ end
data/bin/wp2txt CHANGED
@@ -1,197 +1,192 @@
1
1
  #!/usr/bin/env ruby
2
- # -*- coding: utf-8 -*-
3
-
4
- $: << File.join(File.dirname(__FILE__))
5
- $: << File.join(File.dirname(__FILE__), '..', 'lib')
6
-
7
- $DEBUG_MODE = false
8
- SHAREDIR = File.join(File.dirname(__FILE__), '..', 'share')
9
- DOCDIR = File.join(File.dirname(__FILE__), '..', 'doc')
10
-
11
- require 'wp2txt'
12
- require 'wp2txt/utils'
13
- require 'wp2txt/version'
14
- require 'etc'
15
- require 'optimist'
16
- require 'parallel'
17
- require 'pastel'
18
- require 'tty-spinner'
19
-
20
- include Wp2txt
21
-
22
- opts = Optimist::options do
23
- version Wp2txt::VERSION
24
- banner <<-EOS
25
- WP2TXT extracts plain text data from Wikipedia dump file (encoded in XML/compressed with Bzip2) stripping all the MediaWiki markups and other metadata.
26
-
27
- Usage: wp2txt [options]
28
- where [options] are:
29
- EOS
30
-
31
- opt :input, "Path to compressed file (bz2) or decompressed file (xml), or path to directory containing files of the latter format", :required => true, :short => "-i"
32
- opt :output_dir, "Path to output directory", :default => Dir::pwd, :type => String, :short => "-o"
33
- opt :convert, "Output in plain text (converting from XML)", :default => true, :short => "-c"
34
- opt :category, "Show article category information", :default => true, :short => "-a"
35
- opt :category_only, "Extract only article title and categories", :default => false, :short => "-g"
36
- opt :summary_only, "Extract only article title, categories, and summary text before first heading", :default => false, :short => "-s"
37
- opt :file_size, "Approximate size (in MB) of each output file", :default => 10, :short => "-f"
38
- opt :num_procs, "Number of proccesses to be run concurrently (default: max num of CPU cores minus two)", :short => "-n"
39
- opt :del_interfile, "Delete intermediate XML files from output dir", :short => "-x", :default => false
40
- opt :title, "Keep page titles in output", :default => true, :short => "-t"
41
- opt :heading, "Keep section titles in output", :default => true, :short => "-d"
42
- opt :list, "Keep unprocessed list items in output", :default => false, :short => "-l"
43
- opt :ref, "Keep reference notations in the format [ref]...[/ref]", :default => false, :short => "-r"
44
- opt :redirect, "Show redirect destination", :default => false, :short => "-e"
45
- opt :marker, "Show symbols prefixed to list items, definitions, etc.", :default => true, :short => "-m"
46
- opt :bz2_gem, "Use Ruby's bzip2-ruby gem instead of a system command", :default => false, :short => "-b"
47
- end
48
-
49
- Optimist::die :size, "must be larger than 0" unless opts[:file_size] >= 0
50
- Optimist::die :output_dir, "must exist" unless File.exist?(opts[:output_dir])
51
-
52
- pastel = Pastel.new
53
2
 
54
- input_file = ARGV[0]
55
- output_dir = opts[:output_dir]
56
- tfile_size = opts[:file_size]
57
- num_processors = Etc.nprocessors
58
- if opts[:num_procs] && opts[:num_procs].to_i <= num_processors
59
- num_processes = opts[:num_procs]
60
- else
61
- num_processes = num_processors - 2
62
- end
63
- num_processes = 1 if num_processes < 1
64
-
65
- convert = opts[:convert]
66
- strip_tmarker = opts[:marker] ? false : true
67
- opt_array = [:title,
68
- :list,
69
- :heading,
70
- :table,
71
- :redirect,
72
- :multiline,
73
- :category,
74
- :category_only,
75
- :summary_only,
76
- :del_interfile,
77
- :bz2_gem ]
78
-
79
- $leave_inline_template = true if opts[:inline]
80
- $leave_ref = true if opts[:ref]
81
-
82
- config = {}
83
- opt_array.each do |opt|
84
- config[opt] = opts[opt]
85
- end
3
+ # frozen_string_literal: true
4
+
5
+ DEBUG_MODE = false
6
+ SHAREDIR = File.join(File.dirname(__FILE__), "..", "share")
7
+ DOCDIR = File.join(File.dirname(__FILE__), "..", "doc")
8
+
9
+ require_relative "../lib/wp2txt"
10
+ require_relative "../lib/wp2txt/utils"
11
+ require_relative "../lib/wp2txt/version"
12
+
13
+ require "etc"
14
+ require "optimist"
15
+ require "parallel"
16
+ require "pastel"
17
+ require "tty-spinner"
18
+
19
+ class WpApp
20
+ include Wp2txt
21
+
22
+ def run
23
+ opts = Optimist.options do
24
+ version VERSION
25
+ banner <<~BANNER
26
+ WP2TXT extracts plain text data from Wikipedia dump file (encoded in XML/compressed with Bzip2) stripping all the MediaWiki markups and other metadata.
27
+
28
+ Usage: wp2txt [options]
29
+ where [options] are:
30
+ BANNER
31
+
32
+ opt :input, "Path to compressed file (bz2) or decompressed file (xml), or path to directory containing files of the latter format", type: String, required: true, short: "-i"
33
+ opt :output_dir, "Path to output directory", default: Dir.pwd, type: String, short: "-o"
34
+ opt :convert, "Output in plain text (converting from XML)", default: true, short: "-c"
35
+ opt :category, "Show article category information", default: true, short: "-a"
36
+ opt :category_only, "Extract only article title and categories", default: false, short: "-g"
37
+ opt :summary_only, "Extract only article title, categories, and summary text before first heading", default: false, short: "-s"
38
+ opt :file_size, "Approximate size (in MB) of each output file", default: 10, short: "-f"
39
+ opt :num_procs, "Number of proccesses to be run concurrently (default: max num of CPU cores minus two)", short: "-n"
40
+ opt :del_interfile, "Delete intermediate XML files from output dir", short: "-x", default: false
41
+ opt :title, "Keep page titles in output", default: true, short: "-t"
42
+ opt :heading, "Keep section titles in output", default: true, short: "-d"
43
+ opt :list, "Keep unprocessed list items in output", default: false, short: "-l"
44
+ opt :ref, "Keep reference notations in the format [ref]...[/ref]", default: false, short: "-r"
45
+ opt :redirect, "Show redirect destination", default: false, short: "-e"
46
+ opt :marker, "Show symbols prefixed to list items, definitions, etc.", default: true, short: "-m"
47
+ opt :bz2_gem, "Use Ruby's bzip2-ruby gem instead of a system command", default: false, short: "-b"
48
+ end
86
49
 
87
- if File::ftype(input_file) == "directory"
88
- input_files = Dir.glob("#{input_file}/*.xml")
89
- else
90
- puts ""
91
- puts pastel.green.bold("Preprocessing")
92
- puts "Decompressing and splitting the original dump file."
93
- puts pastel.underline("This may take a while. Please be patient!")
94
-
95
- time_start = Time.now.to_i
96
- wpsplitter = Wp2txt::Splitter.new(input_file, output_dir, tfile_size)
97
- spinner = TTY::Spinner.new(":spinner", format: :arrow_pulse, hide_cursor: true, interval: 5)
98
- spinner.auto_spin
99
- wpsplitter.split_file
100
- time_finish = Time.now.to_i
101
-
102
- spinner.stop("Time: #{sec_to_str(time_finish - time_start)}")# Stop animation
103
- puts pastel.blue.bold("Complete!")
104
- exit if !convert
105
- input_files = Dir.glob("#{output_dir}/*.xml")
106
- end
50
+ Optimist.die :size, "must be larger than 0" unless opts[:file_size] >= 0
51
+ Optimist.die :input, "must exist" unless File.exist?(opts[:input])
52
+ Optimist.die :output_dir, "must exist" unless File.exist?(opts[:output_dir])
53
+
54
+ pastel = Pastel.new
55
+
56
+ input_file = opts[:input]
57
+ output_dir = opts[:output_dir]
58
+ tfile_size = opts[:file_size]
59
+ num_processors = Etc.nprocessors
60
+ num_processes = if opts[:num_procs] && opts[:num_procs].to_i <= num_processors
61
+ opts[:num_procs]
62
+ else
63
+ num_processors - 2
64
+ end
65
+ num_processes = 1 if num_processes < 1
66
+
67
+ convert = opts[:convert]
68
+ strip_tmarker = opts[:marker] ? false : true
69
+ opt_array = %i[title list heading table redirect multiline category category_only summary_only del_interfile bz2_gem]
70
+
71
+ config = {}
72
+ opt_array.each do |opt|
73
+ config[opt] = opts[opt]
74
+ end
107
75
 
108
- puts ""
109
- puts pastel.red.bold("Converting")
110
- puts "Number of files being processed: " + pastel.bold("#{input_files.size}")
111
- puts "Number of CPU cores being used: " + pastel.bold("#{num_processes}")
112
-
113
- Parallel.map(input_files, progress: pastel.magenta.bold("WP2TXT"), in_processes: num_processes) do |input_file|
114
- wpconv = Wp2txt::Runner.new(input_file, output_dir, strip_tmarker, config[:del_interfile])
115
- wpconv.extract_text do |article|
116
- format_wiki!(article.title)
117
-
118
- if config[:category_only]
119
- title = "#{article.title}\t"
120
- contents = article.categories.join(", ")
121
- contents << "\n"
122
- elsif config[:category] && !article.categories.empty?
123
- title = "\n[[#{article.title}]]\n\n"
124
- contents = "\nCATEGORIES: "
125
- contents << article.categories.join(", ")
126
- contents << "\n\n"
76
+ if File.ftype(input_file) == "directory"
77
+ input_files = Dir.glob("#{input_file}/*.xml")
127
78
  else
128
- title = "\n[[#{article.title}]]\n\n"
129
- contents = ""
79
+ puts ""
80
+ puts pastel.green.bold("Preprocessing")
81
+ puts "Decompressing and splitting the original dump file."
82
+ puts pastel.underline("This may take a while. Please be patient!")
83
+
84
+ time_start = Time.now.to_i
85
+ wpsplitter = Splitter.new(input_file, output_dir, tfile_size)
86
+ spinner = TTY::Spinner.new(":spinner", format: :arrow_pulse, hide_cursor: true, interval: 5)
87
+ spinner.auto_spin
88
+ wpsplitter.split_file
89
+ time_finish = Time.now.to_i
90
+
91
+ spinner.stop("Time: #{sec_to_str(time_finish - time_start)}") # Stop animation
92
+ puts pastel.blue.bold("Complete!")
93
+ exit unless convert
94
+ input_files = Dir.glob("#{output_dir}/*.xml")
130
95
  end
131
96
 
132
- unless config[:category_only]
133
- article.elements.each do |e|
134
- case e.first
135
- when :mw_heading
136
- break if config[:summary_only]
137
- next if !config[:heading]
138
- format_wiki!(e.last)
139
- line = e.last
140
- line << "+HEADING+" if $DEBUG_MODE
141
- when :mw_paragraph
142
- format_wiki!(e.last)
143
- line = e.last + "\n"
144
- line << "+PARAGRAPH+" if $DEBUG_MODE
145
- when :mw_table, :mw_htable
146
- next if !config[:table]
147
- line = e.last
148
- line << "+TABLE+" if $DEBUG_MODE
149
- when :mw_pre
150
- next if !config[:pre]
151
- line = e.last
152
- line << "+PRE+" if $DEBUG_MODE
153
- when :mw_quote
154
- line = e.last
155
- line << "+QUOTE+" if $DEBUG_MODE
156
- when :mw_unordered, :mw_ordered, :mw_definition
157
- next if !config[:list]
158
- line = e.last
159
- line << "+LIST+" if $DEBUG_MODE
160
- when :mw_ml_template
161
- next if !config[:multiline]
162
- line = e.last
163
- line << "+MLTEMPLATE+" if $DEBUG_MODE
164
- when :mw_redirect
165
- next if !config[:redirect]
166
- line = e.last
167
- line << "+REDIRECT+" if $DEBUG_MODE
168
- line << "\n\n"
169
- when :mw_isolated_template
170
- next if !config[:multiline]
171
- line = e.last
172
- line << "+ISOLATED_TEMPLATE+" if $DEBUG_MODE
173
- when :mw_isolated_tag
174
- next
97
+ puts ""
98
+ puts pastel.red.bold("Converting")
99
+ puts "Number of files being processed: " + pastel.bold(input_files.size.to_s)
100
+ puts "Number of CPU cores being used: " + pastel.bold(num_processes.to_s)
101
+
102
+ Parallel.map(input_files, progress: pastel.magenta.bold("WP2TXT"), in_processes: num_processes) do |infile|
103
+ wpconv = Runner.new(infile, output_dir, strip_tmarker, config[:del_interfile])
104
+ wpconv.extract_text do |article|
105
+ article.title = format_wiki(article.title, config)
106
+
107
+ if config[:category_only]
108
+ title = "#{article.title}\t"
109
+ contents = article.categories.join(", ")
110
+ contents << "\n"
111
+ elsif config[:category] && !article.categories.empty?
112
+ title = "\n[[#{article.title}]]\n\n"
113
+ contents = +"\nCATEGORIES: "
114
+ contents << article.categories.join(", ")
115
+ contents << "\n\n"
175
116
  else
176
- if $DEBUG_MODE
177
- # format_wiki!(e.last)
178
- line = e.last
179
- line << "+OTHER+"
180
- else
181
- next
117
+ title = "\n[[#{article.title}]]\n\n"
118
+ contents = +""
119
+ end
120
+
121
+ unless config[:category_only]
122
+ article.elements.each do |e|
123
+ case e.first
124
+ when :mw_heading
125
+ break if config[:summary_only]
126
+ next unless config[:heading]
127
+
128
+ e[-1] = format_wiki(e.last, config)
129
+ line = e.last
130
+ line << "+HEADING+" if DEBUG_MODE
131
+ when :mw_paragraph
132
+ e[-1] = format_wiki(e.last, config)
133
+ line = e.last + "\n"
134
+ line << "+PARAGRAPH+" if DEBUG_MODE
135
+ when :mw_table, :mw_htable
136
+ next unless config[:table]
137
+
138
+ line = e.last
139
+ line << "+TABLE+" if DEBUG_MODE
140
+ when :mw_pre
141
+ next unless config[:pre]
142
+
143
+ line = e.last
144
+ line << "+PRE+" if DEBUG_MODE
145
+ when :mw_quote
146
+ line = e.last
147
+ line << "+QUOTE+" if DEBUG_MODE
148
+ when :mw_unordered, :mw_ordered, :mw_definition
149
+ next unless config[:list]
150
+
151
+ line = e.last
152
+ line << "+LIST+" if DEBUG_MODE
153
+ when :mw_ml_template
154
+ next unless config[:multiline]
155
+
156
+ line = e.last
157
+ line << "+MLTEMPLATE+" if DEBUG_MODE
158
+ when :mw_redirect
159
+ next unless config[:redirect]
160
+
161
+ line = e.last
162
+ line << "+REDIRECT+" if DEBUG_MODE
163
+ line << "\n\n"
164
+ when :mw_isolated_template
165
+ next unless config[:multiline]
166
+
167
+ line = e.last
168
+ line << "+ISOLATED_TEMPLATE+" if DEBUG_MODE
169
+ when :mw_isolated_tag
170
+ next
171
+ else
172
+ next unless DEBUG_MODE
173
+
174
+ line = e.last
175
+ line << "+OTHER+"
176
+ end
177
+ contents << line << "\n"
182
178
  end
183
179
  end
184
- contents << line << "\n"
185
- end
186
- end
187
180
 
188
- if /\A[\s ]*\z/m =~ contents
189
- result = ""
190
- else
191
- result = config[:title] ? title << contents : contents
181
+ if /\A[\s ]*\z/m =~ contents
182
+ ""
183
+ else
184
+ config[:title] ? title << contents : contents
185
+ end
186
+ end
192
187
  end
188
+ puts pastel.blue.bold("Complete!")
193
189
  end
194
190
  end
195
191
 
196
- puts pastel.blue.bold("Complete!")
197
-
192
+ WpApp.new.run