infoboxer 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3d9f535874913b90dc945c37405ed54aff97a53a
4
- data.tar.gz: a25dc355939431517d55ffe1d0aa0cbcbef0fff7
3
+ metadata.gz: 69ee012f6cd8bb3e923289a4a921e81798ffd2aa
4
+ data.tar.gz: 42686bced509c310051ba7bc895c68d26a96bd2f
5
5
  SHA512:
6
- metadata.gz: b5fc7579e52f108b56de01e9ca6bcd3b8c54eaec08a4003d15553f2b91318052c6b6f2d3bfac76ff7a913651d9e73925e7c9e87e2ca0c36df26a36107789e735
7
- data.tar.gz: 7692f32da19229dbb5e03e65f8be4db6e4cd605ba5c8d9e2f95200734a288ba8ff8ccb1f518bd365a91c82ead2c1639c495f81538ad983d1e9c9f8f4434e9b2f
6
+ metadata.gz: cc237b46fe0f59a2ba04d3b53da4a7361cd974e50e1d18f688a077093da664a7d205dd363ddc2622a9af4c1a6f1168315d90fb6867e65c0a61bd60e5e453e81a
7
+ data.tar.gz: d844446421cffd8e7f6a8fe885f0f1918c772f9e2e7c1d289a1e90b544fde9b3c2b41c8956df7338aab9ad2ae5ca7f5ec9d67fb2d3f16da34367ed5b96a5aa22
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # Infoboxer's change log
2
2
 
3
+ ## 0.2.2 (2016-01-03)
4
+
5
+ Fixes:
6
+ * more sophisticated table parsing;
7
+ * empty `<nowiki/>` is parsed properly;
8
+ * inline unclosed markup inside wikilinks works;
9
+ * `MediaWiki::Traits` can now be continued in several places.
10
+
3
11
  ## 0.2.1 (2015-12-21)
4
12
 
5
13
  * `infoboxer` binary properly registered.
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
  [![Build Status](https://travis-ci.org/molybdenum-99/infoboxer.svg?branch=master)](https://travis-ci.org/molybdenum-99/infoboxer)
5
5
  [![Coverage Status](https://coveralls.io/repos/molybdenum-99/infoboxer/badge.svg?branch=master&service=github)](https://coveralls.io/github/molybdenum-99/infoboxer?branch=master)
6
6
  [![Code Climate](https://codeclimate.com/github/molybdenum-99/infoboxer/badges/gpa.svg)](https://codeclimate.com/github/molybdenum-99/infoboxer)
7
- [![Molybdenum-99 Gitter](https://badges.gitter.im/molybdenum-99.png)](https://gitter.im/molybdenum-99)
7
+ [![Infoboxer Gitter](https://badges.gitter.im/molybdenum-99/infoboxer.svg)](https://gitter.im/molybdenum-99/infoboxer)
8
8
 
9
9
  **Infoboxer** is pure-Ruby Wikipedia (and generic MediaWiki) client and
10
10
  parser, targeting information extraction (hence the name).
@@ -59,7 +59,8 @@ module Infoboxer
59
59
  # [English Wikipedia traits](https://github.com/molybdenum-99/infoboxer/blob/master/lib/infoboxer/definitions/en.wikipedia.org.rb)
60
60
  # for example implementation.
61
61
  def for(domain, &block)
62
- Class.new(self, &block).domain(domain)
62
+ Traits.domains[domain].tap{|c| c && c.instance_eval(&block)} ||
63
+ Class.new(self, &block).domain(domain)
63
64
  end
64
65
 
65
66
  # @private
@@ -33,8 +33,10 @@ module Infoboxer
33
33
  nodes = Nodes[]
34
34
  guarded_loop do
35
35
  # FIXME: quick and UGLY IS HELL JUST TRYING TO MAKE THE SHIT WORK
36
- if @context.inline_eol_sign
36
+ if @context.inline_eol_sign == /^\]/
37
37
  chunk = @context.scan_until(re.short_inline_until_cache_brackets[until_pattern])
38
+ elsif @context.inline_eol_sign == /^\]\]/
39
+ chunk = @context.scan_until(re.short_inline_until_cache_brackets2[until_pattern])
38
40
  else
39
41
  chunk = @context.scan_until(re.short_inline_until_cache[until_pattern])
40
42
  end
@@ -97,7 +99,7 @@ module Infoboxer
97
99
  when '{{'
98
100
  template
99
101
  when /<nowiki([^>]*)>/
100
- nowiki
102
+ nowiki($1)
101
103
  when /<ref([^>]*)\/>/
102
104
  reference($1, true)
103
105
  when /<ref([^>]*)>/
@@ -114,7 +116,12 @@ module Infoboxer
114
116
  # [[a|b]]
115
117
  def wikilink
116
118
  link = @context.scan_continued_until(/\||\]\]/)
117
- caption = inline(/\]\]/) if @context.matched == '|'
119
+ if @context.matched == '|'
120
+ @context.push_eol_sign(/^\]\]/)
121
+ caption = inline(/\]\]/)
122
+ @context.pop_eol_sign
123
+ end
124
+
118
125
  Wikilink.new(link, caption)
119
126
  end
120
127
 
@@ -136,8 +143,12 @@ module Infoboxer
136
143
  Ref.new(children, parse_params(param_str))
137
144
  end
138
145
 
139
- def nowiki
140
- Text.new(@context.scan_continued_until(/<\/nowiki>/))
146
+ def nowiki(tag_rest)
147
+ if tag_rest.end_with?('/')
148
+ Text.new('')
149
+ else
150
+ Text.new(@context.scan_continued_until(/<\/nowiki>/))
151
+ end
141
152
  end
142
153
  end
143
154
 
@@ -52,10 +52,10 @@ module Infoboxer
52
52
  table_template(table)
53
53
 
54
54
  when nil
55
- @context.fail!("End of input before table ended!")
55
+ return false
56
56
 
57
57
  else
58
- table_cell_cont(table)
58
+ return table_cell_cont(table)
59
59
  end
60
60
  true # should continue parsing
61
61
  end
@@ -103,29 +103,33 @@ module Infoboxer
103
103
  end
104
104
  end
105
105
 
106
- # On-the-fly TableCaption creation handles (real life) case, when
107
- # table has "HTML caption":
108
- # {|
109
- # <caption>....</caption>
110
- #
111
- # Solution is NOT elegant or semantically "right", yet it works.
112
- # Somehow.
106
+ # Good news, everyone! Table can be IMPLICITLY closed when it's
107
+ # not "cell" context.
113
108
  #
109
+ # Unless it's empty row, which is just skipped.
114
110
  def table_cell_cont(table)
115
111
  container = case (last = table.children.last)
116
- when TableRow
117
- cell = last.children.last
118
- cell.is_a?(BaseCell) ? cell : TableCaption.new
119
- when TableCaption
120
- last
121
- when nil
122
- TableCaption.new
123
- else
124
- @context.fail!("Multiline cell inside #{last}")
112
+ when TableRow
113
+ last.children.last
114
+ when TableCaption
115
+ last
116
+ else
117
+ nil
118
+ end
119
+
120
+ if !container
121
+ # return "table not continued" unless row is empty
122
+ if @context.current.empty?
123
+ return true
124
+ else
125
+ @context.prev!
126
+ return false
127
+ end
125
128
  end
126
-
129
+
127
130
  container.push_children(paragraph(/^\s*([|!]|{\|)/))
128
131
  table.push_children(container) unless container.parent
132
+ true
129
133
  end
130
134
  end
131
135
  end
@@ -19,13 +19,20 @@ module Infoboxer
19
19
  }}
20
20
  )]x
21
21
 
22
- INLINE_EOL_BR = %r[(?= # if we have ahead... (not scanned, just checked
22
+ INLINE_EOL_BRACK = %r[(?= # if we have ahead... (not scanned, just checked
23
23
  </ref> | # <ref> closed
24
24
  }} | # or template closed
25
25
  (?<!\])\](?!\]) # or ext.link closed,
26
26
  # the madness with look-ahead/behind means "match single bracket but not double"
27
27
  )]x
28
28
 
29
+ # FIXME: ok, NOW it's officially ridiculous
30
+ INLINE_EOL_BRACK2 = %r[(?= # if we have ahead... (not scanned, just checked
31
+ </ref> | # <ref> closed
32
+ }} | # or template closed
33
+ \]\] # or int.link closed
34
+ )]x
35
+
29
36
 
30
37
  def make_regexps
31
38
  {
@@ -38,7 +45,10 @@ module Infoboxer
38
45
  h[r] = Regexp.union(*[r, INLINE_EOL, FORMATTING, /$/].compact.uniq)
39
46
  },
40
47
  short_inline_until_cache_brackets: Hash.new{|h, r|
41
- h[r] = Regexp.union(*[r, INLINE_EOL_BR, FORMATTING, /$/].compact.uniq)
48
+ h[r] = Regexp.union(*[r, INLINE_EOL_BRACK, FORMATTING, /$/].compact.uniq)
49
+ },
50
+ short_inline_until_cache_brackets2: Hash.new{|h, r|
51
+ h[r] = Regexp.union(*[r, INLINE_EOL_BRACK2, FORMATTING, /$/].compact.uniq)
42
52
  }
43
53
 
44
54
  }
@@ -25,13 +25,13 @@ module Infoboxer
25
25
  #
26
26
  # FIXME: it can easily be several table heading rows
27
27
  def heading_row
28
- rows.first.children.all?(&call(matches?: TableHeading)) ?
28
+ rows.first && rows.first.children.all?(&call(matches?: TableHeading)) ?
29
29
  rows.first : nil
30
30
  end
31
31
 
32
32
  # For now, returns all table rows except {#heading_row}
33
33
  def body_rows
34
- rows.first.children.all?(&call(matches?: TableHeading)) ?
34
+ rows.first && rows.first.children.all?(&call(matches?: TableHeading)) ?
35
35
  rows[1..-1] :
36
36
  rows
37
37
  end
@@ -2,6 +2,6 @@
2
2
  module Infoboxer
3
3
  MAJOR = 0
4
4
  MINOR = 2
5
- PATCH = 1
5
+ PATCH = 2
6
6
  VERSION = [MAJOR, MINOR, PATCH].join('.')
7
7
  end