wp2txt 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/wp2txt CHANGED
@@ -1,197 +1,192 @@
1
1
  #!/usr/bin/env ruby
2
- # -*- coding: utf-8 -*-
3
-
4
- $: << File.join(File.dirname(__FILE__))
5
- $: << File.join(File.dirname(__FILE__), '..', 'lib')
6
-
7
- $DEBUG_MODE = false
8
- SHAREDIR = File.join(File.dirname(__FILE__), '..', 'share')
9
- DOCDIR = File.join(File.dirname(__FILE__), '..', 'doc')
10
-
11
- require 'wp2txt'
12
- require 'wp2txt/utils'
13
- require 'wp2txt/version'
14
- require 'etc'
15
- require 'optimist'
16
- require 'parallel'
17
- require 'pastel'
18
- require 'tty-spinner'
19
-
20
- include Wp2txt
21
-
22
- opts = Optimist::options do
23
- version Wp2txt::VERSION
24
- banner <<-EOS
25
- WP2TXT extracts plain text data from Wikipedia dump file (encoded in XML/compressed with Bzip2) stripping all the MediaWiki markups and other metadata.
26
-
27
- Usage: wp2txt [options]
28
- where [options] are:
29
- EOS
30
-
31
- opt :input, "Path to compressed file (bz2) or decompressed file (xml), or path to directory containing files of the latter format", :required => true, :short => "-i"
32
- opt :output_dir, "Path to output directory", :default => Dir::pwd, :type => String, :short => "-o"
33
- opt :convert, "Output in plain text (converting from XML)", :default => true, :short => "-c"
34
- opt :category, "Show article category information", :default => true, :short => "-a"
35
- opt :category_only, "Extract only article title and categories", :default => false, :short => "-g"
36
- opt :summary_only, "Extract only article title, categories, and summary text before first heading", :default => false, :short => "-s"
37
- opt :file_size, "Approximate size (in MB) of each output file", :default => 10, :short => "-f"
38
- opt :num_procs, "Number of proccesses to be run concurrently (default: max num of CPU cores minus two)", :short => "-n"
39
- opt :del_interfile, "Delete intermediate XML files from output dir", :short => "-x", :default => false
40
- opt :title, "Keep page titles in output", :default => true, :short => "-t"
41
- opt :heading, "Keep section titles in output", :default => true, :short => "-d"
42
- opt :list, "Keep unprocessed list items in output", :default => false, :short => "-l"
43
- opt :ref, "Keep reference notations in the format [ref]...[/ref]", :default => false, :short => "-r"
44
- opt :redirect, "Show redirect destination", :default => false, :short => "-e"
45
- opt :marker, "Show symbols prefixed to list items, definitions, etc.", :default => true, :short => "-m"
46
- opt :bz2_gem, "Use Ruby's bzip2-ruby gem instead of a system command", :default => false, :short => "-b"
47
- end
48
-
49
- Optimist::die :size, "must be larger than 0" unless opts[:file_size] >= 0
50
- Optimist::die :output_dir, "must exist" unless File.exist?(opts[:output_dir])
51
-
52
- pastel = Pastel.new
53
2
 
54
- input_file = ARGV[0]
55
- output_dir = opts[:output_dir]
56
- tfile_size = opts[:file_size]
57
- num_processors = Etc.nprocessors
58
- if opts[:num_procs] && opts[:num_procs].to_i <= num_processors
59
- num_processes = opts[:num_procs]
60
- else
61
- num_processes = num_processors - 2
62
- end
63
- num_processes = 1 if num_processes < 1
64
-
65
- convert = opts[:convert]
66
- strip_tmarker = opts[:marker] ? false : true
67
- opt_array = [:title,
68
- :list,
69
- :heading,
70
- :table,
71
- :redirect,
72
- :multiline,
73
- :category,
74
- :category_only,
75
- :summary_only,
76
- :del_interfile,
77
- :bz2_gem ]
78
-
79
- $leave_inline_template = true if opts[:inline]
80
- $leave_ref = true if opts[:ref]
81
-
82
- config = {}
83
- opt_array.each do |opt|
84
- config[opt] = opts[opt]
85
- end
3
+ # frozen_string_literal: true
4
+
5
+ DEBUG_MODE = false
6
+ SHAREDIR = File.join(File.dirname(__FILE__), "..", "share")
7
+ DOCDIR = File.join(File.dirname(__FILE__), "..", "doc")
8
+
9
+ require_relative "../lib/wp2txt"
10
+ require_relative "../lib/wp2txt/utils"
11
+ require_relative "../lib/wp2txt/version"
12
+
13
+ require "etc"
14
+ require "optimist"
15
+ require "parallel"
16
+ require "pastel"
17
+ require "tty-spinner"
18
+
19
+ class WpApp
20
+ include Wp2txt
21
+
22
+ def run
23
+ opts = Optimist.options do
24
+ version VERSION
25
+ banner <<~BANNER
26
+ WP2TXT extracts plain text data from Wikipedia dump file (encoded in XML/compressed with Bzip2) stripping all the MediaWiki markups and other metadata.
27
+
28
+ Usage: wp2txt [options]
29
+ where [options] are:
30
+ BANNER
31
+
32
+ opt :input, "Path to compressed file (bz2) or decompressed file (xml), or path to directory containing files of the latter format", type: String, required: true, short: "-i"
33
+ opt :output_dir, "Path to output directory", default: Dir.pwd, type: String, short: "-o"
34
+ opt :convert, "Output in plain text (converting from XML)", default: true, short: "-c"
35
+ opt :category, "Show article category information", default: true, short: "-a"
36
+ opt :category_only, "Extract only article title and categories", default: false, short: "-g"
37
+ opt :summary_only, "Extract only article title, categories, and summary text before first heading", default: false, short: "-s"
38
+ opt :file_size, "Approximate size (in MB) of each output file", default: 10, short: "-f"
39
+ opt :num_procs, "Number of proccesses to be run concurrently (default: max num of CPU cores minus two)", short: "-n"
40
+ opt :del_interfile, "Delete intermediate XML files from output dir", short: "-x", default: false
41
+ opt :title, "Keep page titles in output", default: true, short: "-t"
42
+ opt :heading, "Keep section titles in output", default: true, short: "-d"
43
+ opt :list, "Keep unprocessed list items in output", default: false, short: "-l"
44
+ opt :ref, "Keep reference notations in the format [ref]...[/ref]", default: false, short: "-r"
45
+ opt :redirect, "Show redirect destination", default: false, short: "-e"
46
+ opt :marker, "Show symbols prefixed to list items, definitions, etc.", default: true, short: "-m"
47
+ opt :bz2_gem, "Use Ruby's bzip2-ruby gem instead of a system command", default: false, short: "-b"
48
+ end
86
49
 
87
- if File::ftype(input_file) == "directory"
88
- input_files = Dir.glob("#{input_file}/*.xml")
89
- else
90
- puts ""
91
- puts pastel.green.bold("Preprocessing")
92
- puts "Decompressing and splitting the original dump file."
93
- puts pastel.underline("This may take a while. Please be patient!")
94
-
95
- time_start = Time.now.to_i
96
- wpsplitter = Wp2txt::Splitter.new(input_file, output_dir, tfile_size)
97
- spinner = TTY::Spinner.new(":spinner", format: :arrow_pulse, hide_cursor: true, interval: 5)
98
- spinner.auto_spin
99
- wpsplitter.split_file
100
- time_finish = Time.now.to_i
101
-
102
- spinner.stop("Time: #{sec_to_str(time_finish - time_start)}")# Stop animation
103
- puts pastel.blue.bold("Complete!")
104
- exit if !convert
105
- input_files = Dir.glob("#{output_dir}/*.xml")
106
- end
50
+ Optimist.die :size, "must be larger than 0" unless opts[:file_size] >= 0
51
+ Optimist.die :input, "must exist" unless File.exist?(opts[:input])
52
+ Optimist.die :output_dir, "must exist" unless File.exist?(opts[:output_dir])
53
+
54
+ pastel = Pastel.new
55
+
56
+ input_file = opts[:input]
57
+ output_dir = opts[:output_dir]
58
+ tfile_size = opts[:file_size]
59
+ num_processors = Etc.nprocessors
60
+ num_processes = if opts[:num_procs] && opts[:num_procs].to_i <= num_processors
61
+ opts[:num_procs]
62
+ else
63
+ num_processors - 2
64
+ end
65
+ num_processes = 1 if num_processes < 1
66
+
67
+ convert = opts[:convert]
68
+ strip_tmarker = opts[:marker] ? false : true
69
+ opt_array = %i[title list heading table redirect multiline category category_only summary_only del_interfile bz2_gem]
70
+
71
+ config = {}
72
+ opt_array.each do |opt|
73
+ config[opt] = opts[opt]
74
+ end
107
75
 
108
- puts ""
109
- puts pastel.red.bold("Converting")
110
- puts "Number of files being processed: " + pastel.bold("#{input_files.size}")
111
- puts "Number of CPU cores being used: " + pastel.bold("#{num_processes}")
112
-
113
- Parallel.map(input_files, progress: pastel.magenta.bold("WP2TXT"), in_processes: num_processes) do |input_file|
114
- wpconv = Wp2txt::Runner.new(input_file, output_dir, strip_tmarker, config[:del_interfile])
115
- wpconv.extract_text do |article|
116
- format_wiki!(article.title)
117
-
118
- if config[:category_only]
119
- title = "#{article.title}\t"
120
- contents = article.categories.join(", ")
121
- contents << "\n"
122
- elsif config[:category] && !article.categories.empty?
123
- title = "\n[[#{article.title}]]\n\n"
124
- contents = "\nCATEGORIES: "
125
- contents << article.categories.join(", ")
126
- contents << "\n\n"
76
+ if File.ftype(input_file) == "directory"
77
+ input_files = Dir.glob("#{input_file}/*.xml")
127
78
  else
128
- title = "\n[[#{article.title}]]\n\n"
129
- contents = ""
79
+ puts ""
80
+ puts pastel.green.bold("Preprocessing")
81
+ puts "Decompressing and splitting the original dump file."
82
+ puts pastel.underline("This may take a while. Please be patient!")
83
+
84
+ time_start = Time.now.to_i
85
+ wpsplitter = Splitter.new(input_file, output_dir, tfile_size)
86
+ spinner = TTY::Spinner.new(":spinner", format: :arrow_pulse, hide_cursor: true, interval: 5)
87
+ spinner.auto_spin
88
+ wpsplitter.split_file
89
+ time_finish = Time.now.to_i
90
+
91
+ spinner.stop("Time: #{sec_to_str(time_finish - time_start)}") # Stop animation
92
+ puts pastel.blue.bold("Complete!")
93
+ exit unless convert
94
+ input_files = Dir.glob("#{output_dir}/*.xml")
130
95
  end
131
96
 
132
- unless config[:category_only]
133
- article.elements.each do |e|
134
- case e.first
135
- when :mw_heading
136
- break if config[:summary_only]
137
- next if !config[:heading]
138
- format_wiki!(e.last)
139
- line = e.last
140
- line << "+HEADING+" if $DEBUG_MODE
141
- when :mw_paragraph
142
- format_wiki!(e.last)
143
- line = e.last + "\n"
144
- line << "+PARAGRAPH+" if $DEBUG_MODE
145
- when :mw_table, :mw_htable
146
- next if !config[:table]
147
- line = e.last
148
- line << "+TABLE+" if $DEBUG_MODE
149
- when :mw_pre
150
- next if !config[:pre]
151
- line = e.last
152
- line << "+PRE+" if $DEBUG_MODE
153
- when :mw_quote
154
- line = e.last
155
- line << "+QUOTE+" if $DEBUG_MODE
156
- when :mw_unordered, :mw_ordered, :mw_definition
157
- next if !config[:list]
158
- line = e.last
159
- line << "+LIST+" if $DEBUG_MODE
160
- when :mw_ml_template
161
- next if !config[:multiline]
162
- line = e.last
163
- line << "+MLTEMPLATE+" if $DEBUG_MODE
164
- when :mw_redirect
165
- next if !config[:redirect]
166
- line = e.last
167
- line << "+REDIRECT+" if $DEBUG_MODE
168
- line << "\n\n"
169
- when :mw_isolated_template
170
- next if !config[:multiline]
171
- line = e.last
172
- line << "+ISOLATED_TEMPLATE+" if $DEBUG_MODE
173
- when :mw_isolated_tag
174
- next
97
+ puts ""
98
+ puts pastel.red.bold("Converting")
99
+ puts "Number of files being processed: " + pastel.bold(input_files.size.to_s)
100
+ puts "Number of CPU cores being used: " + pastel.bold(num_processes.to_s)
101
+
102
+ Parallel.map(input_files, progress: pastel.magenta.bold("WP2TXT"), in_processes: num_processes) do |infile|
103
+ wpconv = Runner.new(infile, output_dir, strip_tmarker, config[:del_interfile])
104
+ wpconv.extract_text do |article|
105
+ article.title = format_wiki(article.title, config)
106
+
107
+ if config[:category_only]
108
+ title = "#{article.title}\t"
109
+ contents = article.categories.join(", ")
110
+ contents << "\n"
111
+ elsif config[:category] && !article.categories.empty?
112
+ title = "\n[[#{article.title}]]\n\n"
113
+ contents = +"\nCATEGORIES: "
114
+ contents << article.categories.join(", ")
115
+ contents << "\n\n"
175
116
  else
176
- if $DEBUG_MODE
177
- # format_wiki!(e.last)
178
- line = e.last
179
- line << "+OTHER+"
180
- else
181
- next
117
+ title = "\n[[#{article.title}]]\n\n"
118
+ contents = +""
119
+ end
120
+
121
+ unless config[:category_only]
122
+ article.elements.each do |e|
123
+ case e.first
124
+ when :mw_heading
125
+ break if config[:summary_only]
126
+ next unless config[:heading]
127
+
128
+ e[-1] = format_wiki(e.last, config)
129
+ line = e.last
130
+ line << "+HEADING+" if DEBUG_MODE
131
+ when :mw_paragraph
132
+ e[-1] = format_wiki(e.last, config)
133
+ line = e.last + "\n"
134
+ line << "+PARAGRAPH+" if DEBUG_MODE
135
+ when :mw_table, :mw_htable
136
+ next unless config[:table]
137
+
138
+ line = e.last
139
+ line << "+TABLE+" if DEBUG_MODE
140
+ when :mw_pre
141
+ next unless config[:pre]
142
+
143
+ line = e.last
144
+ line << "+PRE+" if DEBUG_MODE
145
+ when :mw_quote
146
+ line = e.last
147
+ line << "+QUOTE+" if DEBUG_MODE
148
+ when :mw_unordered, :mw_ordered, :mw_definition
149
+ next unless config[:list]
150
+
151
+ line = e.last
152
+ line << "+LIST+" if DEBUG_MODE
153
+ when :mw_ml_template
154
+ next unless config[:multiline]
155
+
156
+ line = e.last
157
+ line << "+MLTEMPLATE+" if DEBUG_MODE
158
+ when :mw_redirect
159
+ next unless config[:redirect]
160
+
161
+ line = e.last
162
+ line << "+REDIRECT+" if DEBUG_MODE
163
+ line << "\n\n"
164
+ when :mw_isolated_template
165
+ next unless config[:multiline]
166
+
167
+ line = e.last
168
+ line << "+ISOLATED_TEMPLATE+" if DEBUG_MODE
169
+ when :mw_isolated_tag
170
+ next
171
+ else
172
+ next unless DEBUG_MODE
173
+
174
+ line = e.last
175
+ line << "+OTHER+"
176
+ end
177
+ contents << line << "\n"
182
178
  end
183
179
  end
184
- contents << line << "\n"
185
- end
186
- end
187
180
 
188
- if /\A[\s ]*\z/m =~ contents
189
- result = ""
190
- else
191
- result = config[:title] ? title << contents : contents
181
+ if /\A[\s ]*\z/m =~ contents
182
+ ""
183
+ else
184
+ config[:title] ? title << contents : contents
185
+ end
186
+ end
192
187
  end
188
+ puts pastel.blue.bold("Complete!")
193
189
  end
194
190
  end
195
191
 
196
- puts pastel.blue.bold("Complete!")
197
-
192
+ WpApp.new.run
@@ -1,62 +1,54 @@
1
- #!/usr/bin/env ruby
2
- # -*- coding: utf-8 -*-
3
-
4
- $: << File.join(File.dirname(__FILE__))
5
-
1
+ # frozen_string_literal: true
6
2
 
7
3
  require 'strscan'
8
- require 'utils'
4
+ require_relative 'utils'
9
5
 
10
6
  module Wp2txt
11
-
12
7
  # possible element type, which could be later chosen to print or not to print
13
- # :mw_heading
14
- # :mw_htable
15
- # :mw_quote
16
- # :mw_unordered
17
- # :mw_ordered
18
- # :mw_definition
19
- # :mw_pre
20
- # :mw_paragraph
21
- # :mw_comment
22
- # :mw_math
23
- # :mw_source
24
- # :mw_inputbox
25
- # :mw_template
26
- # :mw_link
27
- # :mw_summary
28
- # :mw_blank
29
- # :mw_redirect
8
+ # :mw_heading
9
+ # :mw_htable
10
+ # :mw_quote
11
+ # :mw_unordered
12
+ # :mw_ordered
13
+ # :mw_definition
14
+ # :mw_pre
15
+ # :mw_paragraph
16
+ # :mw_comment
17
+ # :mw_math
18
+ # :mw_source
19
+ # :mw_inputbox
20
+ # :mw_template
21
+ # :mw_link
22
+ # :mw_summary
23
+ # :mw_blank
24
+ # :mw_redirect
30
25
 
31
26
  # an article contains elements, each of which is [TYPE, string]
32
27
  class Article
33
-
34
28
  include Wp2txt
35
29
  attr_accessor :elements, :title, :categories
36
-
30
+
37
31
  def initialize(text, title = "", strip_tmarker = false)
38
32
  @title = title.strip
39
33
  @strip_tmarker = strip_tmarker
40
- convert_characters!(text)
41
- text.gsub!(/\|\n\n+/m){"|\n"}
42
- remove_html!(text)
43
- make_reference!(text)
44
- remove_ref!(text)
34
+ text = convert_characters(text)
35
+ text = text.gsub(/\|\n\n+/m) { "|\n" }
36
+ text = remove_html(text)
37
+ text = make_reference(text)
38
+ text = remove_ref(text)
45
39
  parse text
46
40
  end
47
-
48
- def create_element(tp, text)
49
- [tp, text]
41
+
42
+ def create_element(tpx, text)
43
+ [tpx, text]
50
44
  end
51
-
45
+
52
46
  def parse(source)
53
47
  @elements = []
54
- @categories = []
48
+ @categories = []
55
49
  mode = nil
56
- open_stack = []
57
- close_stack = []
58
50
  source.each_line do |line|
59
- matched = line.scan($category_regex)
51
+ matched = line.scan(CATEGORY_REGEX)
60
52
  if matched && !matched.empty?
61
53
  @categories += matched
62
54
  @categories.uniq!
@@ -65,108 +57,94 @@ module Wp2txt
65
57
  case mode
66
58
  when :mw_ml_template
67
59
  scanner = StringScanner.new(line)
68
- str= process_nested_structure(scanner, "{{", "}}") {""}
69
- if $ml_template_end_regex =~ str
70
- mode = nil
71
- end
60
+ str = process_nested_structure(scanner, "{{", "}}") { "" }
61
+ mode = nil if ML_TEMPLATE_END_REGEX =~ str
72
62
  @elements.last.last << line
73
63
  next
74
64
  when :mw_ml_link
75
65
  scanner = StringScanner.new(line)
76
- str= process_nested_structure(scanner, "[[", "]]") {""}
77
- if $ml_link_end_regex =~ str
78
- mode = nil
79
- end
66
+ str = process_nested_structure(scanner, "[[", "]]") { "" }
67
+ mode = nil if ML_LINK_END_REGEX =~ str
80
68
  @elements.last.last << line
81
69
  next
82
70
  when :mw_table
83
- if $in_table_regex2 =~ line
84
- mode = nil
85
- end
71
+ mode = nil if IN_TABLE_REGEX2 =~ line
86
72
  @elements.last.last << line
87
- next
73
+ next
88
74
  when :mw_inputbox
89
- if $in_inputbox_regex2 =~ line
90
- mode = nil
91
- end
75
+ mode = nil if IN_INPUTBOX_REGEX2 =~ line
92
76
  @elements.last.last << line
93
77
  next
94
78
  when :mw_source
95
- if $in_source_regex2 =~ line
96
- mode = nil
97
- end
79
+ mode = nil if IN_SOURCE_REGEX2 =~ line
98
80
  @elements.last.last << line
99
81
  next
100
82
  when :mw_math
101
- if $in_math_regex2 =~ line
102
- mode = nil
103
- end
83
+ mode = nil if IN_MATH_REGEX2 =~ line
104
84
  @elements.last.last << line
105
85
  next
106
86
  when :mw_htable
107
- if $in_html_table_regex2 =~ line
108
- mode = nil
109
- end
87
+ mode = nil if IN_HTML_TABLE_REGEX2 =~ line
110
88
  @elements.last.last << line
111
89
  next
112
90
  end
113
91
 
114
92
  case line
115
- when $isolated_template_regex
93
+ when ISOLATED_TEMPLATE_REGEX
116
94
  @elements << create_element(:mw_isolated_template, line)
117
- when $isolated_tag_regex
95
+ when ISOLATED_TAG_REGEX
118
96
  @elements << create_element(:mw_isolated_tag, line)
119
- when $blank_line_regex
120
- @elements << create_element(:mw_blank, "\n")
121
- when $redirect_regex
97
+ when BLANK_LINE_REGEX
98
+ @elements << create_element(:mw_blank, "\n")
99
+ when REDIRECT_REGEX
122
100
  @elements << create_element(:mw_redirect, line)
123
- when $in_heading_regex
124
- line = line.sub($heading_onset_regex){$1}.sub($heading_coda_regex){$1}
101
+ when IN_HEADING_REGEX
102
+ line = line.sub(HEADING_ONSET_REGEX) { $1 }.sub(HEADING_CODA_REGEX) { $1 }
125
103
  @elements << create_element(:mw_heading, "\n" + line + "\n")
126
- when $in_inputbox_regex
104
+ when IN_INPUTBOX_REGEX
127
105
  @elements << create_element(:mw_inputbox, line)
128
- when $ml_template_onset_regex
106
+ when ML_TEMPLATE_ONSET_REGEX
129
107
  @elements << create_element(:mw_ml_template, line)
130
108
  mode = :mw_ml_template
131
- when $ml_link_onset_regex
109
+ when ML_LINK_ONSET_REGEX
132
110
  @elements << create_element(:mw_ml_link, line)
133
111
  mode = :mw_ml_link
134
- when $in_inputbox_regex1
112
+ when IN_INPUTBOX_REGEX1
135
113
  mode = :mw_inputbox
136
114
  @elements << create_element(:mw_inputbox, line)
137
- when $in_source_regex
138
- @elements << create_element(:mw_source, line)
139
- when $in_source_regex1
115
+ when IN_SOURCE_REGEX
116
+ @elements << create_element(:mw_source, line)
117
+ when IN_SOURCE_REGEX1
140
118
  mode = :mw_source
141
119
  @elements << create_element(:mw_source, line)
142
- when $in_math_regex
120
+ when IN_MATH_REGEX
143
121
  @elements << create_element(:mw_math, line)
144
- when $in_math_regex1
122
+ when IN_MATH_REGEX1
145
123
  mode = :mw_math
146
124
  @elements << create_element(:mw_math, line)
147
- when $in_html_table_regex
125
+ when IN_HTML_TABLE_REGEX
148
126
  @elements << create_element(:mw_htable, line)
149
- when $in_html_table_regex1
127
+ when IN_HTML_TABLE_REGEX1
150
128
  mode = :mw_htable
151
129
  @elements << create_element(:mw_htable, line)
152
- when $in_table_regex1
130
+ when IN_TABLE_REGEX1
153
131
  mode = :mw_table
154
132
  @elements << create_element(:mw_table, line)
155
- when $in_unordered_regex
156
- line = line.sub($list_marks_regex, "") if @strip_tmarker
133
+ when IN_UNORDERED_REGEX
134
+ line = line.sub(LIST_MARKS_REGEX, "") if @strip_tmarker
157
135
  @elements << create_element(:mw_unordered, line)
158
- when $in_ordered_regex
159
- line = line.sub($list_marks_regex, "") if @strip_tmarker
136
+ when IN_ORDERED_REGEX
137
+ line = line.sub(LIST_MARKS_REGEX, "") if @strip_tmarker
160
138
  @elements << create_element(:mw_ordered, line)
161
- when $in_pre_regex
162
- line = line.sub($pre_marks_regex, "") if @strip_tmarker
139
+ when IN_PRE_REGEX
140
+ line = line.sub(PRE_MARKS_REGEX, "") if @strip_tmarker
163
141
  @elements << create_element(:mw_pre, line)
164
- when $in_definition_regex
165
- line = line.sub($def_marks_regex, "") if @strip_tmarker
142
+ when IN_DEFINITION_REGEX
143
+ line = line.sub(DEF_MARKS_REGEX, "") if @strip_tmarker
166
144
  @elements << create_element(:mw_definition, line)
167
- when $in_link_regex
145
+ when IN_LINK_REGEX
168
146
  @elements << create_element(:mw_link, line)
169
- else
147
+ else
170
148
  @elements << create_element(:mw_paragraph, "\n" + line)
171
149
  end
172
150
  end