wp2txt 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/bin/wp2txt CHANGED
@@ -1,197 +1,192 @@
1
1
  #!/usr/bin/env ruby
2
- # -*- coding: utf-8 -*-
3
-
4
- $: << File.join(File.dirname(__FILE__))
5
- $: << File.join(File.dirname(__FILE__), '..', 'lib')
6
-
7
- $DEBUG_MODE = false
8
- SHAREDIR = File.join(File.dirname(__FILE__), '..', 'share')
9
- DOCDIR = File.join(File.dirname(__FILE__), '..', 'doc')
10
-
11
- require 'wp2txt'
12
- require 'wp2txt/utils'
13
- require 'wp2txt/version'
14
- require 'etc'
15
- require 'optimist'
16
- require 'parallel'
17
- require 'pastel'
18
- require 'tty-spinner'
19
-
20
- include Wp2txt
21
-
22
- opts = Optimist::options do
23
- version Wp2txt::VERSION
24
- banner <<-EOS
25
- WP2TXT extracts plain text data from Wikipedia dump file (encoded in XML/compressed with Bzip2) stripping all the MediaWiki markups and other metadata.
26
-
27
- Usage: wp2txt [options]
28
- where [options] are:
29
- EOS
30
-
31
- opt :input, "Path to compressed file (bz2) or decompressed file (xml), or path to directory containing files of the latter format", :required => true, :short => "-i"
32
- opt :output_dir, "Path to output directory", :default => Dir::pwd, :type => String, :short => "-o"
33
- opt :convert, "Output in plain text (converting from XML)", :default => true, :short => "-c"
34
- opt :category, "Show article category information", :default => true, :short => "-a"
35
- opt :category_only, "Extract only article title and categories", :default => false, :short => "-g"
36
- opt :summary_only, "Extract only article title, categories, and summary text before first heading", :default => false, :short => "-s"
37
- opt :file_size, "Approximate size (in MB) of each output file", :default => 10, :short => "-f"
38
- opt :num_procs, "Number of proccesses to be run concurrently (default: max num of CPU cores minus two)", :short => "-n"
39
- opt :del_interfile, "Delete intermediate XML files from output dir", :short => "-x", :default => false
40
- opt :title, "Keep page titles in output", :default => true, :short => "-t"
41
- opt :heading, "Keep section titles in output", :default => true, :short => "-d"
42
- opt :list, "Keep unprocessed list items in output", :default => false, :short => "-l"
43
- opt :ref, "Keep reference notations in the format [ref]...[/ref]", :default => false, :short => "-r"
44
- opt :redirect, "Show redirect destination", :default => false, :short => "-e"
45
- opt :marker, "Show symbols prefixed to list items, definitions, etc.", :default => true, :short => "-m"
46
- opt :bz2_gem, "Use Ruby's bzip2-ruby gem instead of a system command", :default => false, :short => "-b"
47
- end
48
-
49
- Optimist::die :size, "must be larger than 0" unless opts[:file_size] >= 0
50
- Optimist::die :output_dir, "must exist" unless File.exist?(opts[:output_dir])
51
-
52
- pastel = Pastel.new
53
2
 
54
- input_file = ARGV[0]
55
- output_dir = opts[:output_dir]
56
- tfile_size = opts[:file_size]
57
- num_processors = Etc.nprocessors
58
- if opts[:num_procs] && opts[:num_procs].to_i <= num_processors
59
- num_processes = opts[:num_procs]
60
- else
61
- num_processes = num_processors - 2
62
- end
63
- num_processes = 1 if num_processes < 1
64
-
65
- convert = opts[:convert]
66
- strip_tmarker = opts[:marker] ? false : true
67
- opt_array = [:title,
68
- :list,
69
- :heading,
70
- :table,
71
- :redirect,
72
- :multiline,
73
- :category,
74
- :category_only,
75
- :summary_only,
76
- :del_interfile,
77
- :bz2_gem ]
78
-
79
- $leave_inline_template = true if opts[:inline]
80
- $leave_ref = true if opts[:ref]
81
-
82
- config = {}
83
- opt_array.each do |opt|
84
- config[opt] = opts[opt]
85
- end
3
+ # frozen_string_literal: true
4
+
5
+ DEBUG_MODE = false
6
+ SHAREDIR = File.join(File.dirname(__FILE__), "..", "share")
7
+ DOCDIR = File.join(File.dirname(__FILE__), "..", "doc")
8
+
9
+ require_relative "../lib/wp2txt"
10
+ require_relative "../lib/wp2txt/utils"
11
+ require_relative "../lib/wp2txt/version"
12
+
13
+ require "etc"
14
+ require "optimist"
15
+ require "parallel"
16
+ require "pastel"
17
+ require "tty-spinner"
18
+
19
+ class WpApp
20
+ include Wp2txt
21
+
22
+ def run
23
+ opts = Optimist.options do
24
+ version VERSION
25
+ banner <<~BANNER
26
+ WP2TXT extracts plain text data from Wikipedia dump file (encoded in XML/compressed with Bzip2) stripping all the MediaWiki markups and other metadata.
27
+
28
+ Usage: wp2txt [options]
29
+ where [options] are:
30
+ BANNER
31
+
32
+ opt :input, "Path to compressed file (bz2) or decompressed file (xml), or path to directory containing files of the latter format", type: String, required: true, short: "-i"
33
+ opt :output_dir, "Path to output directory", default: Dir.pwd, type: String, short: "-o"
34
+ opt :convert, "Output in plain text (converting from XML)", default: true, short: "-c"
35
+ opt :category, "Show article category information", default: true, short: "-a"
36
+ opt :category_only, "Extract only article title and categories", default: false, short: "-g"
37
+ opt :summary_only, "Extract only article title, categories, and summary text before first heading", default: false, short: "-s"
38
+ opt :file_size, "Approximate size (in MB) of each output file", default: 10, short: "-f"
39
+ opt :num_procs, "Number of proccesses to be run concurrently (default: max num of CPU cores minus two)", short: "-n"
40
+ opt :del_interfile, "Delete intermediate XML files from output dir", short: "-x", default: false
41
+ opt :title, "Keep page titles in output", default: true, short: "-t"
42
+ opt :heading, "Keep section titles in output", default: true, short: "-d"
43
+ opt :list, "Keep unprocessed list items in output", default: false, short: "-l"
44
+ opt :ref, "Keep reference notations in the format [ref]...[/ref]", default: false, short: "-r"
45
+ opt :redirect, "Show redirect destination", default: false, short: "-e"
46
+ opt :marker, "Show symbols prefixed to list items, definitions, etc.", default: true, short: "-m"
47
+ opt :bz2_gem, "Use Ruby's bzip2-ruby gem instead of a system command", default: false, short: "-b"
48
+ end
86
49
 
87
- if File::ftype(input_file) == "directory"
88
- input_files = Dir.glob("#{input_file}/*.xml")
89
- else
90
- puts ""
91
- puts pastel.green.bold("Preprocessing")
92
- puts "Decompressing and splitting the original dump file."
93
- puts pastel.underline("This may take a while. Please be patient!")
94
-
95
- time_start = Time.now.to_i
96
- wpsplitter = Wp2txt::Splitter.new(input_file, output_dir, tfile_size)
97
- spinner = TTY::Spinner.new(":spinner", format: :arrow_pulse, hide_cursor: true, interval: 5)
98
- spinner.auto_spin
99
- wpsplitter.split_file
100
- time_finish = Time.now.to_i
101
-
102
- spinner.stop("Time: #{sec_to_str(time_finish - time_start)}")# Stop animation
103
- puts pastel.blue.bold("Complete!")
104
- exit if !convert
105
- input_files = Dir.glob("#{output_dir}/*.xml")
106
- end
50
+ Optimist.die :size, "must be larger than 0" unless opts[:file_size] >= 0
51
+ Optimist.die :input, "must exist" unless File.exist?(opts[:input])
52
+ Optimist.die :output_dir, "must exist" unless File.exist?(opts[:output_dir])
53
+
54
+ pastel = Pastel.new
55
+
56
+ input_file = opts[:input]
57
+ output_dir = opts[:output_dir]
58
+ tfile_size = opts[:file_size]
59
+ num_processors = Etc.nprocessors
60
+ num_processes = if opts[:num_procs] && opts[:num_procs].to_i <= num_processors
61
+ opts[:num_procs]
62
+ else
63
+ num_processors - 2
64
+ end
65
+ num_processes = 1 if num_processes < 1
66
+
67
+ convert = opts[:convert]
68
+ strip_tmarker = opts[:marker] ? false : true
69
+ opt_array = %i[title list heading table redirect multiline category category_only summary_only del_interfile bz2_gem]
70
+
71
+ config = {}
72
+ opt_array.each do |opt|
73
+ config[opt] = opts[opt]
74
+ end
107
75
 
108
- puts ""
109
- puts pastel.red.bold("Converting")
110
- puts "Number of files being processed: " + pastel.bold("#{input_files.size}")
111
- puts "Number of CPU cores being used: " + pastel.bold("#{num_processes}")
112
-
113
- Parallel.map(input_files, progress: pastel.magenta.bold("WP2TXT"), in_processes: num_processes) do |input_file|
114
- wpconv = Wp2txt::Runner.new(input_file, output_dir, strip_tmarker, config[:del_interfile])
115
- wpconv.extract_text do |article|
116
- format_wiki!(article.title)
117
-
118
- if config[:category_only]
119
- title = "#{article.title}\t"
120
- contents = article.categories.join(", ")
121
- contents << "\n"
122
- elsif config[:category] && !article.categories.empty?
123
- title = "\n[[#{article.title}]]\n\n"
124
- contents = "\nCATEGORIES: "
125
- contents << article.categories.join(", ")
126
- contents << "\n\n"
76
+ if File.ftype(input_file) == "directory"
77
+ input_files = Dir.glob("#{input_file}/*.xml")
127
78
  else
128
- title = "\n[[#{article.title}]]\n\n"
129
- contents = ""
79
+ puts ""
80
+ puts pastel.green.bold("Preprocessing")
81
+ puts "Decompressing and splitting the original dump file."
82
+ puts pastel.underline("This may take a while. Please be patient!")
83
+
84
+ time_start = Time.now.to_i
85
+ wpsplitter = Splitter.new(input_file, output_dir, tfile_size)
86
+ spinner = TTY::Spinner.new(":spinner", format: :arrow_pulse, hide_cursor: true, interval: 5)
87
+ spinner.auto_spin
88
+ wpsplitter.split_file
89
+ time_finish = Time.now.to_i
90
+
91
+ spinner.stop("Time: #{sec_to_str(time_finish - time_start)}") # Stop animation
92
+ puts pastel.blue.bold("Complete!")
93
+ exit unless convert
94
+ input_files = Dir.glob("#{output_dir}/*.xml")
130
95
  end
131
96
 
132
- unless config[:category_only]
133
- article.elements.each do |e|
134
- case e.first
135
- when :mw_heading
136
- break if config[:summary_only]
137
- next if !config[:heading]
138
- format_wiki!(e.last)
139
- line = e.last
140
- line << "+HEADING+" if $DEBUG_MODE
141
- when :mw_paragraph
142
- format_wiki!(e.last)
143
- line = e.last + "\n"
144
- line << "+PARAGRAPH+" if $DEBUG_MODE
145
- when :mw_table, :mw_htable
146
- next if !config[:table]
147
- line = e.last
148
- line << "+TABLE+" if $DEBUG_MODE
149
- when :mw_pre
150
- next if !config[:pre]
151
- line = e.last
152
- line << "+PRE+" if $DEBUG_MODE
153
- when :mw_quote
154
- line = e.last
155
- line << "+QUOTE+" if $DEBUG_MODE
156
- when :mw_unordered, :mw_ordered, :mw_definition
157
- next if !config[:list]
158
- line = e.last
159
- line << "+LIST+" if $DEBUG_MODE
160
- when :mw_ml_template
161
- next if !config[:multiline]
162
- line = e.last
163
- line << "+MLTEMPLATE+" if $DEBUG_MODE
164
- when :mw_redirect
165
- next if !config[:redirect]
166
- line = e.last
167
- line << "+REDIRECT+" if $DEBUG_MODE
168
- line << "\n\n"
169
- when :mw_isolated_template
170
- next if !config[:multiline]
171
- line = e.last
172
- line << "+ISOLATED_TEMPLATE+" if $DEBUG_MODE
173
- when :mw_isolated_tag
174
- next
97
+ puts ""
98
+ puts pastel.red.bold("Converting")
99
+ puts "Number of files being processed: " + pastel.bold(input_files.size.to_s)
100
+ puts "Number of CPU cores being used: " + pastel.bold(num_processes.to_s)
101
+
102
+ Parallel.map(input_files, progress: pastel.magenta.bold("WP2TXT"), in_processes: num_processes) do |infile|
103
+ wpconv = Runner.new(infile, output_dir, strip_tmarker, config[:del_interfile])
104
+ wpconv.extract_text do |article|
105
+ article.title = format_wiki(article.title, config)
106
+
107
+ if config[:category_only]
108
+ title = "#{article.title}\t"
109
+ contents = article.categories.join(", ")
110
+ contents << "\n"
111
+ elsif config[:category] && !article.categories.empty?
112
+ title = "\n[[#{article.title}]]\n\n"
113
+ contents = +"\nCATEGORIES: "
114
+ contents << article.categories.join(", ")
115
+ contents << "\n\n"
175
116
  else
176
- if $DEBUG_MODE
177
- # format_wiki!(e.last)
178
- line = e.last
179
- line << "+OTHER+"
180
- else
181
- next
117
+ title = "\n[[#{article.title}]]\n\n"
118
+ contents = +""
119
+ end
120
+
121
+ unless config[:category_only]
122
+ article.elements.each do |e|
123
+ case e.first
124
+ when :mw_heading
125
+ break if config[:summary_only]
126
+ next unless config[:heading]
127
+
128
+ e[-1] = format_wiki(e.last, config)
129
+ line = e.last
130
+ line << "+HEADING+" if DEBUG_MODE
131
+ when :mw_paragraph
132
+ e[-1] = format_wiki(e.last, config)
133
+ line = e.last + "\n"
134
+ line << "+PARAGRAPH+" if DEBUG_MODE
135
+ when :mw_table, :mw_htable
136
+ next unless config[:table]
137
+
138
+ line = e.last
139
+ line << "+TABLE+" if DEBUG_MODE
140
+ when :mw_pre
141
+ next unless config[:pre]
142
+
143
+ line = e.last
144
+ line << "+PRE+" if DEBUG_MODE
145
+ when :mw_quote
146
+ line = e.last
147
+ line << "+QUOTE+" if DEBUG_MODE
148
+ when :mw_unordered, :mw_ordered, :mw_definition
149
+ next unless config[:list]
150
+
151
+ line = e.last
152
+ line << "+LIST+" if DEBUG_MODE
153
+ when :mw_ml_template
154
+ next unless config[:multiline]
155
+
156
+ line = e.last
157
+ line << "+MLTEMPLATE+" if DEBUG_MODE
158
+ when :mw_redirect
159
+ next unless config[:redirect]
160
+
161
+ line = e.last
162
+ line << "+REDIRECT+" if DEBUG_MODE
163
+ line << "\n\n"
164
+ when :mw_isolated_template
165
+ next unless config[:multiline]
166
+
167
+ line = e.last
168
+ line << "+ISOLATED_TEMPLATE+" if DEBUG_MODE
169
+ when :mw_isolated_tag
170
+ next
171
+ else
172
+ next unless DEBUG_MODE
173
+
174
+ line = e.last
175
+ line << "+OTHER+"
176
+ end
177
+ contents << line << "\n"
182
178
  end
183
179
  end
184
- contents << line << "\n"
185
- end
186
- end
187
180
 
188
- if /\A[\s ]*\z/m =~ contents
189
- result = ""
190
- else
191
- result = config[:title] ? title << contents : contents
181
+ if /\A[\s ]*\z/m =~ contents
182
+ ""
183
+ else
184
+ config[:title] ? title << contents : contents
185
+ end
186
+ end
192
187
  end
188
+ puts pastel.blue.bold("Complete!")
193
189
  end
194
190
  end
195
191
 
196
- puts pastel.blue.bold("Complete!")
197
-
192
+ WpApp.new.run
@@ -1,62 +1,54 @@
1
- #!/usr/bin/env ruby
2
- # -*- coding: utf-8 -*-
3
-
4
- $: << File.join(File.dirname(__FILE__))
5
-
1
+ # frozen_string_literal: true
6
2
 
7
3
  require 'strscan'
8
- require 'utils'
4
+ require_relative 'utils'
9
5
 
10
6
  module Wp2txt
11
-
12
7
  # possible element type, which could be later chosen to print or not to print
13
- # :mw_heading
14
- # :mw_htable
15
- # :mw_quote
16
- # :mw_unordered
17
- # :mw_ordered
18
- # :mw_definition
19
- # :mw_pre
20
- # :mw_paragraph
21
- # :mw_comment
22
- # :mw_math
23
- # :mw_source
24
- # :mw_inputbox
25
- # :mw_template
26
- # :mw_link
27
- # :mw_summary
28
- # :mw_blank
29
- # :mw_redirect
8
+ # :mw_heading
9
+ # :mw_htable
10
+ # :mw_quote
11
+ # :mw_unordered
12
+ # :mw_ordered
13
+ # :mw_definition
14
+ # :mw_pre
15
+ # :mw_paragraph
16
+ # :mw_comment
17
+ # :mw_math
18
+ # :mw_source
19
+ # :mw_inputbox
20
+ # :mw_template
21
+ # :mw_link
22
+ # :mw_summary
23
+ # :mw_blank
24
+ # :mw_redirect
30
25
 
31
26
  # an article contains elements, each of which is [TYPE, string]
32
27
  class Article
33
-
34
28
  include Wp2txt
35
29
  attr_accessor :elements, :title, :categories
36
-
30
+
37
31
  def initialize(text, title = "", strip_tmarker = false)
38
32
  @title = title.strip
39
33
  @strip_tmarker = strip_tmarker
40
- convert_characters!(text)
41
- text.gsub!(/\|\n\n+/m){"|\n"}
42
- remove_html!(text)
43
- make_reference!(text)
44
- remove_ref!(text)
34
+ text = convert_characters(text)
35
+ text = text.gsub(/\|\n\n+/m) { "|\n" }
36
+ text = remove_html(text)
37
+ text = make_reference(text)
38
+ text = remove_ref(text)
45
39
  parse text
46
40
  end
47
-
48
- def create_element(tp, text)
49
- [tp, text]
41
+
42
+ def create_element(tpx, text)
43
+ [tpx, text]
50
44
  end
51
-
45
+
52
46
  def parse(source)
53
47
  @elements = []
54
- @categories = []
48
+ @categories = []
55
49
  mode = nil
56
- open_stack = []
57
- close_stack = []
58
50
  source.each_line do |line|
59
- matched = line.scan($category_regex)
51
+ matched = line.scan(CATEGORY_REGEX)
60
52
  if matched && !matched.empty?
61
53
  @categories += matched
62
54
  @categories.uniq!
@@ -65,108 +57,94 @@ module Wp2txt
65
57
  case mode
66
58
  when :mw_ml_template
67
59
  scanner = StringScanner.new(line)
68
- str= process_nested_structure(scanner, "{{", "}}") {""}
69
- if $ml_template_end_regex =~ str
70
- mode = nil
71
- end
60
+ str = process_nested_structure(scanner, "{{", "}}") { "" }
61
+ mode = nil if ML_TEMPLATE_END_REGEX =~ str
72
62
  @elements.last.last << line
73
63
  next
74
64
  when :mw_ml_link
75
65
  scanner = StringScanner.new(line)
76
- str= process_nested_structure(scanner, "[[", "]]") {""}
77
- if $ml_link_end_regex =~ str
78
- mode = nil
79
- end
66
+ str = process_nested_structure(scanner, "[[", "]]") { "" }
67
+ mode = nil if ML_LINK_END_REGEX =~ str
80
68
  @elements.last.last << line
81
69
  next
82
70
  when :mw_table
83
- if $in_table_regex2 =~ line
84
- mode = nil
85
- end
71
+ mode = nil if IN_TABLE_REGEX2 =~ line
86
72
  @elements.last.last << line
87
- next
73
+ next
88
74
  when :mw_inputbox
89
- if $in_inputbox_regex2 =~ line
90
- mode = nil
91
- end
75
+ mode = nil if IN_INPUTBOX_REGEX2 =~ line
92
76
  @elements.last.last << line
93
77
  next
94
78
  when :mw_source
95
- if $in_source_regex2 =~ line
96
- mode = nil
97
- end
79
+ mode = nil if IN_SOURCE_REGEX2 =~ line
98
80
  @elements.last.last << line
99
81
  next
100
82
  when :mw_math
101
- if $in_math_regex2 =~ line
102
- mode = nil
103
- end
83
+ mode = nil if IN_MATH_REGEX2 =~ line
104
84
  @elements.last.last << line
105
85
  next
106
86
  when :mw_htable
107
- if $in_html_table_regex2 =~ line
108
- mode = nil
109
- end
87
+ mode = nil if IN_HTML_TABLE_REGEX2 =~ line
110
88
  @elements.last.last << line
111
89
  next
112
90
  end
113
91
 
114
92
  case line
115
- when $isolated_template_regex
93
+ when ISOLATED_TEMPLATE_REGEX
116
94
  @elements << create_element(:mw_isolated_template, line)
117
- when $isolated_tag_regex
95
+ when ISOLATED_TAG_REGEX
118
96
  @elements << create_element(:mw_isolated_tag, line)
119
- when $blank_line_regex
120
- @elements << create_element(:mw_blank, "\n")
121
- when $redirect_regex
97
+ when BLANK_LINE_REGEX
98
+ @elements << create_element(:mw_blank, "\n")
99
+ when REDIRECT_REGEX
122
100
  @elements << create_element(:mw_redirect, line)
123
- when $in_heading_regex
124
- line = line.sub($heading_onset_regex){$1}.sub($heading_coda_regex){$1}
101
+ when IN_HEADING_REGEX
102
+ line = line.sub(HEADING_ONSET_REGEX) { $1 }.sub(HEADING_CODA_REGEX) { $1 }
125
103
  @elements << create_element(:mw_heading, "\n" + line + "\n")
126
- when $in_inputbox_regex
104
+ when IN_INPUTBOX_REGEX
127
105
  @elements << create_element(:mw_inputbox, line)
128
- when $ml_template_onset_regex
106
+ when ML_TEMPLATE_ONSET_REGEX
129
107
  @elements << create_element(:mw_ml_template, line)
130
108
  mode = :mw_ml_template
131
- when $ml_link_onset_regex
109
+ when ML_LINK_ONSET_REGEX
132
110
  @elements << create_element(:mw_ml_link, line)
133
111
  mode = :mw_ml_link
134
- when $in_inputbox_regex1
112
+ when IN_INPUTBOX_REGEX1
135
113
  mode = :mw_inputbox
136
114
  @elements << create_element(:mw_inputbox, line)
137
- when $in_source_regex
138
- @elements << create_element(:mw_source, line)
139
- when $in_source_regex1
115
+ when IN_SOURCE_REGEX
116
+ @elements << create_element(:mw_source, line)
117
+ when IN_SOURCE_REGEX1
140
118
  mode = :mw_source
141
119
  @elements << create_element(:mw_source, line)
142
- when $in_math_regex
120
+ when IN_MATH_REGEX
143
121
  @elements << create_element(:mw_math, line)
144
- when $in_math_regex1
122
+ when IN_MATH_REGEX1
145
123
  mode = :mw_math
146
124
  @elements << create_element(:mw_math, line)
147
- when $in_html_table_regex
125
+ when IN_HTML_TABLE_REGEX
148
126
  @elements << create_element(:mw_htable, line)
149
- when $in_html_table_regex1
127
+ when IN_HTML_TABLE_REGEX1
150
128
  mode = :mw_htable
151
129
  @elements << create_element(:mw_htable, line)
152
- when $in_table_regex1
130
+ when IN_TABLE_REGEX1
153
131
  mode = :mw_table
154
132
  @elements << create_element(:mw_table, line)
155
- when $in_unordered_regex
156
- line = line.sub($list_marks_regex, "") if @strip_tmarker
133
+ when IN_UNORDERED_REGEX
134
+ line = line.sub(LIST_MARKS_REGEX, "") if @strip_tmarker
157
135
  @elements << create_element(:mw_unordered, line)
158
- when $in_ordered_regex
159
- line = line.sub($list_marks_regex, "") if @strip_tmarker
136
+ when IN_ORDERED_REGEX
137
+ line = line.sub(LIST_MARKS_REGEX, "") if @strip_tmarker
160
138
  @elements << create_element(:mw_ordered, line)
161
- when $in_pre_regex
162
- line = line.sub($pre_marks_regex, "") if @strip_tmarker
139
+ when IN_PRE_REGEX
140
+ line = line.sub(PRE_MARKS_REGEX, "") if @strip_tmarker
163
141
  @elements << create_element(:mw_pre, line)
164
- when $in_definition_regex
165
- line = line.sub($def_marks_regex, "") if @strip_tmarker
142
+ when IN_DEFINITION_REGEX
143
+ line = line.sub(DEF_MARKS_REGEX, "") if @strip_tmarker
166
144
  @elements << create_element(:mw_definition, line)
167
- when $in_link_regex
145
+ when IN_LINK_REGEX
168
146
  @elements << create_element(:mw_link, line)
169
- else
147
+ else
170
148
  @elements << create_element(:mw_paragraph, "\n" + line)
171
149
  end
172
150
  end