infoboxer 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3d9f535874913b90dc945c37405ed54aff97a53a
4
- data.tar.gz: a25dc355939431517d55ffe1d0aa0cbcbef0fff7
3
+ metadata.gz: 69ee012f6cd8bb3e923289a4a921e81798ffd2aa
4
+ data.tar.gz: 42686bced509c310051ba7bc895c68d26a96bd2f
5
5
  SHA512:
6
- metadata.gz: b5fc7579e52f108b56de01e9ca6bcd3b8c54eaec08a4003d15553f2b91318052c6b6f2d3bfac76ff7a913651d9e73925e7c9e87e2ca0c36df26a36107789e735
7
- data.tar.gz: 7692f32da19229dbb5e03e65f8be4db6e4cd605ba5c8d9e2f95200734a288ba8ff8ccb1f518bd365a91c82ead2c1639c495f81538ad983d1e9c9f8f4434e9b2f
6
+ metadata.gz: cc237b46fe0f59a2ba04d3b53da4a7361cd974e50e1d18f688a077093da664a7d205dd363ddc2622a9af4c1a6f1168315d90fb6867e65c0a61bd60e5e453e81a
7
+ data.tar.gz: d844446421cffd8e7f6a8fe885f0f1918c772f9e2e7c1d289a1e90b544fde9b3c2b41c8956df7338aab9ad2ae5ca7f5ec9d67fb2d3f16da34367ed5b96a5aa22
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # Infoboxer's change log
2
2
 
3
+ ## 0.2.2 (2016-01-03)
4
+
5
+ Fixes:
6
+ * more sophisticated table parsing;
7
+ * empty `<nowiki/>` is parsed properly;
8
+ * inline unclosed markup inside wikilinks works;
9
+ * `MediaWiki::Traits` can now be continued in several places.
10
+
3
11
  ## 0.2.1 (2015-12-21)
4
12
 
5
13
  * `infoboxer` binary properly registered.
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
  [![Build Status](https://travis-ci.org/molybdenum-99/infoboxer.svg?branch=master)](https://travis-ci.org/molybdenum-99/infoboxer)
5
5
  [![Coverage Status](https://coveralls.io/repos/molybdenum-99/infoboxer/badge.svg?branch=master&service=github)](https://coveralls.io/github/molybdenum-99/infoboxer?branch=master)
6
6
  [![Code Climate](https://codeclimate.com/github/molybdenum-99/infoboxer/badges/gpa.svg)](https://codeclimate.com/github/molybdenum-99/infoboxer)
7
- [![Molybdenum-99 Gitter](https://badges.gitter.im/molybdenum-99.png)](https://gitter.im/molybdenum-99)
7
+ [![Infoboxer Gitter](https://badges.gitter.im/molybdenum-99/infoboxer.svg)](https://gitter.im/molybdenum-99/infoboxer)
8
8
 
9
9
  **Infoboxer** is pure-Ruby Wikipedia (and generic MediaWiki) client and
10
10
  parser, targeting information extraction (hence the name).
@@ -59,7 +59,8 @@ module Infoboxer
59
59
  # [English Wikipedia traits](https://github.com/molybdenum-99/infoboxer/blob/master/lib/infoboxer/definitions/en.wikipedia.org.rb)
60
60
  # for example implementation.
61
61
  def for(domain, &block)
62
- Class.new(self, &block).domain(domain)
62
+ Traits.domains[domain].tap{|c| c && c.instance_eval(&block)} ||
63
+ Class.new(self, &block).domain(domain)
63
64
  end
64
65
 
65
66
  # @private
@@ -33,8 +33,10 @@ module Infoboxer
33
33
  nodes = Nodes[]
34
34
  guarded_loop do
35
35
  # FIXME: quick and UGLY IS HELL JUST TRYING TO MAKE THE SHIT WORK
36
- if @context.inline_eol_sign
36
+ if @context.inline_eol_sign == /^\]/
37
37
  chunk = @context.scan_until(re.short_inline_until_cache_brackets[until_pattern])
38
+ elsif @context.inline_eol_sign == /^\]\]/
39
+ chunk = @context.scan_until(re.short_inline_until_cache_brackets2[until_pattern])
38
40
  else
39
41
  chunk = @context.scan_until(re.short_inline_until_cache[until_pattern])
40
42
  end
@@ -97,7 +99,7 @@ module Infoboxer
97
99
  when '{{'
98
100
  template
99
101
  when /<nowiki([^>]*)>/
100
- nowiki
102
+ nowiki($1)
101
103
  when /<ref([^>]*)\/>/
102
104
  reference($1, true)
103
105
  when /<ref([^>]*)>/
@@ -114,7 +116,12 @@ module Infoboxer
114
116
  # [[a|b]]
115
117
  def wikilink
116
118
  link = @context.scan_continued_until(/\||\]\]/)
117
- caption = inline(/\]\]/) if @context.matched == '|'
119
+ if @context.matched == '|'
120
+ @context.push_eol_sign(/^\]\]/)
121
+ caption = inline(/\]\]/)
122
+ @context.pop_eol_sign
123
+ end
124
+
118
125
  Wikilink.new(link, caption)
119
126
  end
120
127
 
@@ -136,8 +143,12 @@ module Infoboxer
136
143
  Ref.new(children, parse_params(param_str))
137
144
  end
138
145
 
139
- def nowiki
140
- Text.new(@context.scan_continued_until(/<\/nowiki>/))
146
+ def nowiki(tag_rest)
147
+ if tag_rest.end_with?('/')
148
+ Text.new('')
149
+ else
150
+ Text.new(@context.scan_continued_until(/<\/nowiki>/))
151
+ end
141
152
  end
142
153
  end
143
154
 
@@ -52,10 +52,10 @@ module Infoboxer
52
52
  table_template(table)
53
53
 
54
54
  when nil
55
- @context.fail!("End of input before table ended!")
55
+ return false
56
56
 
57
57
  else
58
- table_cell_cont(table)
58
+ return table_cell_cont(table)
59
59
  end
60
60
  true # should continue parsing
61
61
  end
@@ -103,29 +103,33 @@ module Infoboxer
103
103
  end
104
104
  end
105
105
 
106
- # On-the-fly TableCaption creation handles (real life) case, when
107
- # table has "HTML caption":
108
- # {|
109
- # <caption>....</caption>
110
- #
111
- # Solution is NOT elegant or semantically "right", yet it works.
112
- # Somehow.
106
+ # Good news, everyone! Table can be IMPLICITLY closed when it's
107
+ # not "cell" context.
113
108
  #
109
+ # Unless it's empty row, which is just skipped.
114
110
  def table_cell_cont(table)
115
111
  container = case (last = table.children.last)
116
- when TableRow
117
- cell = last.children.last
118
- cell.is_a?(BaseCell) ? cell : TableCaption.new
119
- when TableCaption
120
- last
121
- when nil
122
- TableCaption.new
123
- else
124
- @context.fail!("Multiline cell inside #{last}")
112
+ when TableRow
113
+ last.children.last
114
+ when TableCaption
115
+ last
116
+ else
117
+ nil
118
+ end
119
+
120
+ if !container
121
+ # return "table not continued" unless row is empty
122
+ if @context.current.empty?
123
+ return true
124
+ else
125
+ @context.prev!
126
+ return false
127
+ end
125
128
  end
126
-
129
+
127
130
  container.push_children(paragraph(/^\s*([|!]|{\|)/))
128
131
  table.push_children(container) unless container.parent
132
+ true
129
133
  end
130
134
  end
131
135
  end
@@ -19,13 +19,20 @@ module Infoboxer
19
19
  }}
20
20
  )]x
21
21
 
22
- INLINE_EOL_BR = %r[(?= # if we have ahead... (not scanned, just checked
22
+ INLINE_EOL_BRACK = %r[(?= # if we have ahead... (not scanned, just checked
23
23
  </ref> | # <ref> closed
24
24
  }} | # or template closed
25
25
  (?<!\])\](?!\]) # or ext.link closed,
26
26
  # the madness with look-ahead/behind means "match single bracket but not double"
27
27
  )]x
28
28
 
29
+ # FIXME: ok, NOW it's officially ridiculous
30
+ INLINE_EOL_BRACK2 = %r[(?= # if we have ahead... (not scanned, just checked
31
+ </ref> | # <ref> closed
32
+ }} | # or template closed
33
+ \]\] # or int.link closed
34
+ )]x
35
+
29
36
 
30
37
  def make_regexps
31
38
  {
@@ -38,7 +45,10 @@ module Infoboxer
38
45
  h[r] = Regexp.union(*[r, INLINE_EOL, FORMATTING, /$/].compact.uniq)
39
46
  },
40
47
  short_inline_until_cache_brackets: Hash.new{|h, r|
41
- h[r] = Regexp.union(*[r, INLINE_EOL_BR, FORMATTING, /$/].compact.uniq)
48
+ h[r] = Regexp.union(*[r, INLINE_EOL_BRACK, FORMATTING, /$/].compact.uniq)
49
+ },
50
+ short_inline_until_cache_brackets2: Hash.new{|h, r|
51
+ h[r] = Regexp.union(*[r, INLINE_EOL_BRACK2, FORMATTING, /$/].compact.uniq)
42
52
  }
43
53
 
44
54
  }
@@ -25,13 +25,13 @@ module Infoboxer
25
25
  #
26
26
  # FIXME: it can easily be several table heading rows
27
27
  def heading_row
28
- rows.first.children.all?(&call(matches?: TableHeading)) ?
28
+ rows.first && rows.first.children.all?(&call(matches?: TableHeading)) ?
29
29
  rows.first : nil
30
30
  end
31
31
 
32
32
  # For now, returns all table rows except {#heading_row}
33
33
  def body_rows
34
- rows.first.children.all?(&call(matches?: TableHeading)) ?
34
+ rows.first && rows.first.children.all?(&call(matches?: TableHeading)) ?
35
35
  rows[1..-1] :
36
36
  rows
37
37
  end
@@ -2,6 +2,6 @@
2
2
  module Infoboxer
3
3
  MAJOR = 0
4
4
  MINOR = 2
5
- PATCH = 1
5
+ PATCH = 2
6
6
  VERSION = [MAJOR, MINOR, PATCH].join('.')
7
7
  end