wp2txt 0.6.1 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bcfd6986e262e455c100d664583b099d57ff4428
4
- data.tar.gz: 68c922b43951b7f326b0136681981a166208d0a9
3
+ metadata.gz: 80f68e6c1ac855160575f85f4d78ca378f0a1c2b
4
+ data.tar.gz: 16bbac80e7139ea63dd46baf54fb5deaf0840e59
5
5
  SHA512:
6
- metadata.gz: 294e0f8e1d2b37534ad885c617cfbd72ad72144dca6fb01231f6e2cf691a86bf58690f5dd1b2b410f8ee23eb3c74fa3f40e4ca8bbf3f3921ea78295783da5f2e
7
- data.tar.gz: 71a1b8feca5c3067ff534f0239c4c937485ff3ed8c0a6de793be0b71befd440cb5b1c2c465dc6684c558a53209ab6481dd2e60edfe7fb7cbdd9c6f07416efd24
6
+ metadata.gz: 004d26fa39aae4eb194858cf85ae8aad33f65dc556a08bbfc499ead05d49e70af4f5ba5e708354aa816cd6b38d8e9860866cefa7d6c0730058e9a186ff9eec31
7
+ data.tar.gz: c2523b8afeab165c37de028eedff36e719a2472f9440469e4041c342b08463d439351a89523d959ff28d53364c76a2af44502113bb2084eacbbc8ac14306f8a4
data/README.md CHANGED
@@ -8,7 +8,7 @@ WP2TXT extracts plain text data from Wikipedia dump file (encoded in XML/compres
8
8
 
9
9
  ### Features ###
10
10
 
11
- * Convert dump files of Wikipedia of multiple languages (I hope).
11
+ * Convert dump files of Wikipedia of various languages (I hope).
12
12
  * Create output files of specified size.
13
13
  * Allow users to specify text elements to be extracted/converted (page titles, section titles, lists, and tables).
14
14
 
@@ -16,12 +16,6 @@ WP2TXT extracts plain text data from Wikipedia dump file (encoded in XML/compres
16
16
 
17
17
  $ gem install wp2txt
18
18
 
19
- It is highly recommended you also install bz2-ruby gem. See the following for the details about bz2-ruby gem:
20
-
21
- [https://github.com/brianmario/bzip2-ruby](https://github.com/brianmario/bzip2-ruby)
22
-
23
- When the above gem is not found, wp2txt will try to use bzip2 program in your command line environment. Supposedly he former option is more reliable as well as fast.
24
-
25
19
  ### Usage
26
20
 
27
21
  Obtain a Wikipedia dump file (from [here](http://dumps.wikimedia.org/backup-index.html)) with a file name such as:
@@ -32,10 +26,10 @@ where `xx` is language code such as "en (English)" or "ja (Japanese)", and `yyy
32
26
 
33
27
  Command line options are as follows:
34
28
 
35
- *CAUTION:* command line options in the current version have been drastically changed from those in versions 0.5!
29
+ *CAUTION:* Command line options in the current version have been drastically changed from previous versions.
36
30
 
37
- Usage: wp2txt [options]
38
- where [options] are:
31
+ Usage: wp2txt [options]
32
+ where [options] are:
39
33
  --input-file, -i: Wikipedia dump file with .bz2 (compressed) or
40
34
  .txt (uncompressed) format
41
35
  --output-dir, -o <s>: Output directory (default:
@@ -46,13 +40,15 @@ Command line options are as follows:
46
40
  --heading, --no-heading, -d: Show section titles in output (default: true)
47
41
  --title, --no-title, -t: Show page titles in output (default: true)
48
42
  --table, -a: Show table source code in output
49
- --template, -e: Show template specifications in output
43
+ --template, -e: leave inline template notations unmodified
50
44
  --redirect, -r: Show redirect destination
51
45
  --marker, --no-marker, -m: Show symbols prefixed to list items,
52
46
  definitions, etc. (Default: true)
53
47
  --category, -g: Show article category information
54
48
  --file-size, -f <i>: Approximate size (in MB) of each output file
55
49
  (default: 10)
50
+ --limit-recur, -u <i>: Max number of recursive call (0 to 10)
51
+ (default: 10)
56
52
  --version, -v: Print version and exit
57
53
  --help, -h: Show this message
58
54
 
@@ -71,6 +67,11 @@ Command line options are as follows:
71
67
 
72
68
  * Yoichiro Hasebe (<yohasebe@gmail.com>)
73
69
 
70
+ ### References ###
71
+
72
+ * Yoichiro HASEBE. 2006. [Method for using Wikipedia as Japanese corpus.](http://ci.nii.ac.jp/naid/110006226727) _Doshisha Studies in Language and Culture_ 9(2), 373-403.
73
+ * 長谷部陽一郎. 2006. [Wikipedia日本語版をコーパスとして用いた言語研究の手法](http://ci.nii.ac.jp/naid/110006226727). 『言語文化』9(2), 373-403.
74
+
74
75
  ### License ###
75
76
 
76
77
  This software is distributed under the MIT License. Please see the LICENSE file.
@@ -18,13 +18,11 @@ tfile_size = 10
18
18
  convert = true
19
19
  strip_tmarker = true
20
20
 
21
-
22
-
23
21
  Benchmark.bm do |x|
24
22
  x.report do
25
23
  wpconv = Wp2txt::Runner.new(parent, input_file, output_dir, tfile_size, convert, strip_tmarker)
26
24
  wpconv.extract_text do |article|
27
- title = format_wiki article.title
25
+ title = format_wiki! article.title
28
26
  title = "[[#{title}]]\n"
29
27
 
30
28
  contents = "\nCATEGORIES: "
@@ -34,25 +32,31 @@ Benchmark.bm do |x|
34
32
  article.elements.each do |e|
35
33
  case e.first
36
34
  when :mw_heading
37
- line = format_wiki(e.last)
35
+ format_wiki!(e.last)
36
+ line = e.last
38
37
  when :mw_paragraph
39
- line = format_wiki(e.last)
38
+ format_wiki!(e.last)
39
+ line = e.last
40
40
  when :mw_table, :mw_htable
41
- line = format_wiki(e.last)
41
+ format_wiki!(e.last)
42
+ line = e.last
42
43
  when :mw_pre
43
44
  line = e.last
44
45
  when :mw_quote
45
- line = format_wiki(e.last)
46
+ format_wiki!(e.last)
47
+ line = e.last
46
48
  when :mw_unordered, :mw_ordered, :mw_definition
47
- line = format_wiki(e.last)
49
+ format_wiki!(e.last)
50
+ line = e.last
48
51
  when :mw_redirect
49
- line = format_wiki(e.last)
52
+ format_wiki!(e.last)
53
+ line = e.last
50
54
  line += "\n\n"
51
55
  else
52
56
  next
53
57
  end
54
58
  contents += line
55
- contents = remove_templates(contents)
59
+ remove_templates!(contents)
56
60
  end
57
61
 
58
62
  ##### cleanup #####
data/bin/wp2txt CHANGED
@@ -31,39 +31,41 @@ EOS
31
31
  opt :heading, "Show section titles in output", :default => true, :short => "-d"
32
32
  opt :title, "Show page titles in output", :default => true
33
33
  opt :table, "Show table source code in output", :default => false
34
- opt :template, "Show template specifications in output", :default => false
34
+ opt :template, "leave inline template notations unmodified", :default => false
35
35
  opt :redirect, "Show redirect destination", :default => false
36
36
  opt :marker, "Show symbols prefixed to list items, definitions, etc.", :default => true
37
37
  opt :category, "Show article category information", :default => false
38
- opt :file_size, "Approximate size (in MB) of each output file", :default => 10
38
+ opt :file_size, "Approximate size (in MB) of each output file", :default => 10
39
+ opt :limit_recur, "Max number of recursive call (0 to 10)", :default => 10
39
40
  end
40
41
  Trollop::die :size, "must be larger than 0" unless opts[:file_size] >= 0
41
42
  Trollop::die :output_dir, "must exist" unless File.exist?(opts[:output_dir])
43
+ Trollop::die :limit_recur, "must be 10 or smaller" if opts[:limit_recur] > 10
42
44
 
43
45
  input_file = ARGV[0]
44
46
  output_dir = opts[:output_dir]
45
47
  tfile_size = opts[:file_size]
48
+ limit_recur = opts[:limit_recur]
46
49
  convert = opts[:convert]
47
50
  strip_tmarker = opts[:marker] ? false : true
48
- opt_array = [:title, :list, :heading, :table, :template, :redirect]
51
+ opt_array = [:title, :list, :heading, :table, :redirect]
52
+ $leave_template = true if opts[:template]
49
53
  config = {}
50
54
  opt_array.each do |opt|
51
55
  config[opt] = opts[opt]
52
56
  end
53
57
 
54
- # a "parent" is either commandline progress bar or
55
- # a gui window (not available for now)
56
58
  parent = Wp2txt::CmdProgbar.new
57
- wpconv = Wp2txt::Runner.new(parent, input_file, output_dir, tfile_size, convert, strip_tmarker)
59
+ wpconv = Wp2txt::Runner.new(parent, input_file, output_dir, tfile_size, convert, strip_tmarker, limit_recur)
58
60
 
59
61
  wpconv.extract_text do |article|
60
- title = format_wiki article.title
61
- title = "[[#{title}]]\n"
62
+ format_wiki!(article.title)
63
+ title = "[[#{article.title}]]\n"
62
64
 
63
65
  if opts[:category] && !article.categories.empty?
64
66
  contents = "\nCATEGORIES: "
65
- contents += article.categories.join(", ")
66
- contents += "\n\n"
67
+ contents << article.categories.join(", ")
68
+ contents << "\n\n"
67
69
  else
68
70
  contents = ""
69
71
  end
@@ -72,44 +74,62 @@ wpconv.extract_text do |article|
72
74
  case e.first
73
75
  when :mw_heading
74
76
  next if !config[:heading]
75
- line = format_wiki(e.last)
76
- line += "+HEADING+" if $DEBUG_MODE
77
+ format_wiki!(e.last)
78
+ line = e.last
79
+ line << "+HEADING+" if $DEBUG_MODE
77
80
  when :mw_paragraph
78
81
  # next if !config[:paragraph]
79
- line = format_wiki(e.last)
80
- line += "+PARAGRAPH+" if $DEBUG_MODE
82
+ format_wiki!(e.last)
83
+ line = e.last
84
+ line << "+PARAGRAPH+" if $DEBUG_MODE
81
85
  when :mw_table, :mw_htable
82
86
  next if !config[:table]
83
- line = format_wiki(e.last)
84
- line += "+TABLE+" if $DEBUG_MODE
87
+ format_wiki!(e.last)
88
+ line = e.last
89
+ line << "+TABLE+" if $DEBUG_MODE
85
90
  when :mw_pre
86
91
  next if !config[:pre]
87
92
  line = e.last
88
- line += "+PRE+" if $DEBUG_MODE
93
+ line << "+PRE+" if $DEBUG_MODE
89
94
  when :mw_quote
90
95
  # next if !config[:quote]
91
- line = format_wiki(e.last)
92
- line += "+QUOTE+" if $DEBUG_MODE
96
+ format_wiki!(e.last)
97
+ line = e.last
98
+ line << "+QUOTE+" if $DEBUG_MODE
93
99
  when :mw_unordered, :mw_ordered, :mw_definition
94
100
  next if !config[:list]
95
- line = format_wiki(e.last)
96
- line += "+LIST+" if $DEBUG_MODE
101
+ format_wiki!(e.last)
102
+ line = e.last
103
+ line << "+LIST+" if $DEBUG_MODE
97
104
  when :mw_redirect
98
105
  next if !config[:redirect]
99
- line = format_wiki(e.last)
100
- line += "+REDIRECT+" if $DEBUG_MODE
101
- line += "\n\n"
106
+ format_wiki!(e.last)
107
+ line = e.last
108
+ line << "+REDIRECT+" if $DEBUG_MODE
109
+ line << "\n\n"
102
110
  else
103
111
  if $DEBUG_MODE
104
- line = format_wiki(e.last)
105
- line += "+OTHER+"
112
+ format_wiki!(e.last)
113
+ line = e.last
114
+ line << "+OTHER+"
106
115
  else
107
116
  next
108
117
  end
109
118
  end
110
- contents += line
111
- contents = remove_templates(contents) unless config[:template]
119
+ contents << line
112
120
  end
121
+
122
+ remove_directive!(contents)
123
+ remove_emphasis!(contents)
124
+ mndash!(contents)
125
+ make_reference!(contents)
126
+ format_ref!(contents)
127
+ remove_hr!(contents)
128
+ remove_tag!(contents)
129
+ special_chr!(contents)
130
+
131
+ correct_inline_template!(contents) unless $leave_template
132
+ remove_templates!(contents) unless $leave_template
113
133
 
114
134
  ##### cleanup #####
115
135
  if /\A\s*\z/m =~ contents
@@ -3,14 +3,18 @@
3
3
 
4
4
  $: << File.join(File.dirname(__FILE__))
5
5
 
6
- require "rubygems"
7
- require "bundler/setup"
8
- require "nokogiri"
6
+ # require "rubygems"
7
+ # require "bundler/setup"
9
8
 
9
+ require "Nokogiri"
10
+ # require "oga"
11
+ # require "ox"
12
+
13
+ require 'pp'
10
14
  require "wp2txt/article"
11
15
  require "wp2txt/utils"
12
- require "wp2txt/mw_api"
13
16
  require "wp2txt/progressbar"
17
+ # require "wp2txt/mw_api"
14
18
 
15
19
  begin
16
20
  require "bzip2-ruby"
@@ -25,9 +29,7 @@ module Wp2txt
25
29
 
26
30
  include Wp2txt
27
31
 
28
- # attr_accessor :pause_flag, :stop_flag, :outfiles, :convert
29
-
30
- def initialize(parent, input_file, output_dir = ".", tfile_size = 10, convert = true, strip_tmarker = false)
32
+ def initialize(parent, input_file, output_dir = ".", tfile_size = 10, convert = true, strip_tmarker = false, limit_recur = 10)
31
33
  @parent = parent
32
34
  @fp = nil
33
35
 
@@ -36,6 +38,9 @@ module Wp2txt
36
38
  @tfile_size = tfile_size
37
39
  @convert = convert
38
40
  @strip_tmarker = strip_tmarker
41
+
42
+ #max number of recursive calls (global variable)
43
+ $limit_recur = limit_recur
39
44
  end
40
45
 
41
46
  def file_size(file)
@@ -111,7 +116,9 @@ module Wp2txt
111
116
  else
112
117
  file = IO.popen("bzip2 -c -d #{@input_file}")
113
118
  end
119
+ @parent.msg("Preparing ... This may take several minutes or more ", 0)
114
120
  @infile_size = file_size(file)
121
+ @parent.msg("... Done.", 1)
115
122
  file.close # try to reopen since rewind method is unavailable
116
123
  if RUBY_PLATFORM.index("win32")
117
124
  file = IO.popen("bunzip2.exe -c #{@input_file}")
@@ -237,13 +244,41 @@ module Wp2txt
237
244
  while page = get_page
238
245
  xmlns = '<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.5/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.5/ http://www.mediawiki.org/xml/export-0.5.xsd" version="0.5" xml:lang="en">' + "\n"
239
246
  xml = xmlns + page + "</mediawiki>"
247
+
240
248
  input = Nokogiri::XML(xml, nil, 'UTF-8')
241
- page = input.xpath("//xmlns:text").first
249
+ page = input.xpath("//xmlns:text").first
242
250
  pp_title = page.parent.parent.at_css "title"
243
251
  title = pp_title.content
244
-
245
- next if /\:/ =~ title
252
+ next if /\:/ =~ title
246
253
  text = page.content
254
+
255
+ # input = Oga.parse_xml(xml)
256
+ # page = input.xpath("//xmlns:text").first
257
+ # title = page.parent.parent.xpath("//xmlns:title").first.text
258
+ # next if /\:/ =~ title
259
+ # text = page.text
260
+
261
+ # input = Ox.load(xml, :encoding => "UTF-8")
262
+ # title = ""
263
+ # text = ""
264
+ # input.nodes.first.nodes.each do |n|
265
+ # if n.name == "title"
266
+ # title = n.nodes.first
267
+ # if /\:/ =~ title
268
+ # title = ""
269
+ # break
270
+ # end
271
+ # elsif n.name == "revision"
272
+ # n.nodes.each do |o|
273
+ # if o.name == "text"
274
+ # text = o.nodes.first
275
+ # break
276
+ # end
277
+ # end
278
+ # end
279
+ # end
280
+ # next if title == "" || text == ""
281
+
247
282
  # remove all comment texts
248
283
  # and insert as many number of new line chars included in
249
284
  # each comment instead
@@ -256,7 +291,7 @@ module Wp2txt
256
291
  end
257
292
  end
258
293
 
259
- @count ||= 0;@count += 1;
294
+ @count ||= 0;@count += 1;
260
295
 
261
296
  article = Article.new(text, title, @strip_tmarker)
262
297
  output_text += block.call(article)
@@ -3,77 +3,37 @@
3
3
 
4
4
  $: << File.join(File.dirname(__FILE__))
5
5
 
6
+
6
7
  require 'strscan'
7
8
  require 'utils'
8
9
 
9
10
  module Wp2txt
10
11
 
11
12
  # possible element type, which could be later chosen to print or not to print
12
- # :mw_heading
13
- # :mw_htable
14
- # :mw_quote
15
- # :mw_unordered
16
- # :mw_ordered
17
- # :mw_definition
18
- # :mw_pre
19
- # :mw_paragraph
20
- # :mw_comment
21
- # :mw_math
22
- # :mw_source
23
- # :mw_inputbox
24
- # :mw_template
25
- # :mw_link
26
- # :mw_summary
27
- # :mw_blank
28
- # :mw_redirect
29
-
13
+ # :mw_heading
14
+ # :mw_htable
15
+ # :mw_quote
16
+ # :mw_unordered
17
+ # :mw_ordered
18
+ # :mw_definition
19
+ # :mw_pre
20
+ # :mw_paragraph
21
+ # :mw_comment
22
+ # :mw_math
23
+ # :mw_source
24
+ # :mw_inputbox
25
+ # :mw_template
26
+ # :mw_link
27
+ # :mw_summary
28
+ # :mw_blank
29
+ # :mw_redirect
30
+
30
31
  # an article contains elements, each of which is [TYPE, string]
31
32
  class Article
32
33
 
33
34
  include Wp2txt
34
35
  attr_accessor :elements, :title, :categories
35
36
 
36
- # class varialbes to save resource for generating regexps
37
- # those with a trailing number 1 represent opening tag/markup
38
- # those with a trailing number 2 represent closing tag/markup
39
- # those without a trailing number contain both opening/closing tags/markups
40
-
41
- @@in_template_regex = Regexp.new('^\s*\{\{[^\}]+\}\}\s*$')
42
- @@in_link_regex = Regexp.new('^\s*\[.*\]\s*$')
43
-
44
- @@in_inputbox_regex = Regexp.new('<inputbox>.*?<\/inputbox>')
45
- @@in_inputbox_regex1 = Regexp.new('<inputbox>')
46
- @@in_inputbox_regex2 = Regexp.new('<\/inputbox>')
47
-
48
- @@in_source_regex = Regexp.new('<source.*?>.*?<\/source>')
49
- @@in_source_regex1 = Regexp.new('<source.*?>')
50
- @@in_source_regex2 = Regexp.new('<\/source>')
51
-
52
- @@in_math_regex = Regexp.new('<math.*?>.*?<\/math>')
53
- @@in_math_regex1 = Regexp.new('<math.*?>')
54
- @@in_math_regex2 = Regexp.new('<\/math>')
55
-
56
- @@in_heading_regex = Regexp.new('^=+.*?=+$')
57
-
58
- @@in_html_table_regex = Regexp.new('<table.*?><\/table>')
59
- @@in_html_table_regex1 = Regexp.new('<table\b')
60
- @@in_html_table_regex2 = Regexp.new('<\/\s*table>')
61
-
62
- @@in_table_regex1 = Regexp.new('^\s*\{\|')
63
- @@in_table_regex2 = Regexp.new('^\|\}.*?$')
64
-
65
- @@in_unordered_regex = Regexp.new('^\*')
66
- @@in_ordered_regex = Regexp.new('^\#')
67
- @@in_pre_regex = Regexp.new('^ ')
68
- @@in_definition_regex = Regexp.new('^[\;\:]')
69
-
70
- @@blank_line_regex = Regexp.new('^\s*$')
71
-
72
- @@redirect_regex = Regexp.new('#(?:REDIRECT|転送)\s+\[\[(.+)\]\]', Regexp::IGNORECASE)
73
-
74
- category_patterns = ["Category", "Categoria"].join("|")
75
- @@category_regex = Regexp.new('[\{\[\|\b](?:' + category_patterns + ')\:(.*?)[\}\]\|\b]', Regexp::IGNORECASE)
76
-
77
37
  def initialize(text, title = "", strip_tmarker = false)
78
38
  @title = title.strip
79
39
  @strip_tmarker = strip_tmarker
@@ -91,39 +51,39 @@ module Wp2txt
91
51
  open_stack = []
92
52
  close_stack = []
93
53
  source.each_line do |line|
94
- matched = line.scan(@@category_regex)
54
+ matched = line.scan($category_regex)
95
55
  if matched && !matched.empty?
96
56
  @categories += matched
97
- @categories = @categories.uniq
57
+ @categories.uniq!
98
58
  end
99
59
 
100
60
  case mode
101
61
  when :mw_table
102
- if @@in_table_regex2 =~ line
62
+ if $in_table_regex2 =~ line
103
63
  mode = nil
104
64
  end
105
65
  @elements.last.last << line
106
66
  next
107
67
  when :mw_inputbox
108
- if @@in_inputbox_regex2 =~ line
68
+ if $in_inputbox_regex2 =~ line
109
69
  mode = nil
110
70
  end
111
71
  @elements.last.last << line
112
72
  next
113
73
  when :mw_source
114
- if @@in_source_regex2 =~ line
74
+ if $in_source_regex2 =~ line
115
75
  mode = nil
116
76
  end
117
77
  @elements.last.last << line
118
78
  next
119
79
  when :mw_math
120
- if @@in_math_regex2 =~ line
80
+ if $in_math_regex2 =~ line
121
81
  mode = nil
122
82
  end
123
83
  @elements.last.last << line
124
84
  next
125
85
  when :mw_htable
126
- if @@in_html_table_regex2 =~ line
86
+ if $in_html_table_regex2 =~ line
127
87
  mode = nil
128
88
  end
129
89
  @elements.last.last << line
@@ -131,51 +91,51 @@ module Wp2txt
131
91
  end
132
92
 
133
93
  case line
134
- when @@blank_line_regex
94
+ when $blank_line_regex
135
95
  @elements << create_element(:mw_blank, "\n")
136
- when @@redirect_regex
96
+ when $redirect_regex
137
97
  @elements << create_element(:mw_redirect, line)
138
- when @@in_template_regex
98
+ when $in_template_regex
139
99
  @elements << create_element(:mw_template, line)
140
- when @@in_heading_regex
141
- line = line.sub(/^(\=+)\s+/){$1}.sub(/\s+(\=+)$/){$1}
100
+ when $in_heading_regex
101
+ line = line.sub($heading_onset_regex){$1}.sub($heading_coda_regex){$1}
142
102
  @elements << create_element(:mw_heading, "\n" + line + "\n")
143
- when @@in_inputbox_regex
103
+ when $in_inputbox_regex
144
104
  @elements << create_element(:mw_inputbox, line)
145
- when @@in_inputbox_regex1
105
+ when $in_inputbox_regex1
146
106
  mode = :mw_inputbox
147
107
  @elements << create_element(:mw_inputbox, line)
148
- when @@in_source_regex
108
+ when $in_source_regex
149
109
  @elements << create_element(:mw_source, line)
150
- when @@in_source_regex1
110
+ when $in_source_regex1
151
111
  mode = :mw_source
152
112
  @elements << create_element(:mw_source, line)
153
- when @@in_math_regex
113
+ when $in_math_regex
154
114
  @elements << create_element(:mw_math, line)
155
- when @@in_math_regex1
115
+ when $in_math_regex1
156
116
  mode = :mw_math
157
117
  @elements << create_element(:mw_math, line)
158
- when @@in_html_table_regex
118
+ when $in_html_table_regex
159
119
  @elements << create_element(:mw_htable, line)
160
- when @@in_html_table_regex1
120
+ when $in_html_table_regex1
161
121
  mode = :mw_htable
162
122
  @elements << create_element(:mw_htable, line)
163
- when @@in_table_regex1
123
+ when $in_table_regex1
164
124
  mode = :mw_table
165
125
  @elements << create_element(:mw_table, line)
166
- when @@in_unordered_regex
167
- line = line.sub(/\A[\*\#\;\:\ ]+/, "") if @strip_tmarker
126
+ when $in_unordered_regex
127
+ line = line.sub($list_marks_regex, "") if @strip_tmarker
168
128
  @elements << create_element(:mw_unordered, line)
169
- when @@in_ordered_regex
170
- line = line.sub(/\A[\*\#\;\:\ ]+/, "") if @strip_tmarker
129
+ when $in_ordered_regex
130
+ line = line.sub($list_marks_regex, "") if @strip_tmarker
171
131
  @elements << create_element(:mw_ordered, line)
172
- when @@in_pre_regex
173
- line = line.sub(/\A\^\ /, "") if @strip_tmarker
132
+ when $in_pre_regex
133
+ line = line.sub($pre_marks_regex, "") if @strip_tmarker
174
134
  @elements << create_element(:mw_pre, line)
175
- when @@in_definition_regex
176
- line = line.sub(/\A[\;\:\ ]+/, "") if @strip_tmarker
135
+ when $in_definition_regex
136
+ line = line.sub($def_marks_regex, "") if @strip_tmarker
177
137
  @elements << create_element(:mw_definition, line)
178
- when @@in_link_regex
138
+ when $in_link_regex
179
139
  @elements << create_element(:mw_link, line)
180
140
  else
181
141
  @elements << create_element(:mw_paragraph, line)