qipowl 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +11 -0
- data/.gitignore +17 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/.yardopts +3 -0
- data/Gemfile +17 -0
- data/LICENSE +20 -0
- data/README.md +345 -0
- data/Rakefile +21 -0
- data/bin/bowler +44 -0
- data/config/bowlers/cmd.yaml +3 -0
- data/config/bowlers/html.yaml +128 -0
- data/config/bowlers/html_supplemental.yaml +3 -0
- data/config/bowlers/markdown2html.yaml +23 -0
- data/extras/demo/main.rb +34 -0
- data/extras/demo/public/apple-touch-icon-114x114-precomposed.png +0 -0
- data/extras/demo/public/apple-touch-icon-144x144-precomposed.png +0 -0
- data/extras/demo/public/apple-touch-icon-57x57-precomposed.png +0 -0
- data/extras/demo/public/apple-touch-icon-72x72-precomposed.png +0 -0
- data/extras/demo/public/apple-touch-icon-precomposed.png +0 -0
- data/extras/demo/public/apple-touch-icon.png +0 -0
- data/extras/demo/public/css/bootstrap-theme.css +384 -0
- data/extras/demo/public/css/bootstrap-theme.min.css +1 -0
- data/extras/demo/public/css/bootstrap.css +6805 -0
- data/extras/demo/public/css/bootstrap.min.css +9 -0
- data/extras/demo/public/css/main.css +22 -0
- data/extras/demo/public/favicon.ico +0 -0
- data/extras/demo/public/fonts/glyphicons-halflings-regular.eot +0 -0
- data/extras/demo/public/fonts/glyphicons-halflings-regular.svg +228 -0
- data/extras/demo/public/fonts/glyphicons-halflings-regular.ttf +0 -0
- data/extras/demo/public/fonts/glyphicons-halflings-regular.woff +0 -0
- data/extras/demo/public/html.html +262 -0
- data/extras/demo/public/index.html +110 -0
- data/extras/demo/public/js/main.js +1 -0
- data/extras/demo/public/js/vendor/bootstrap.js +1999 -0
- data/extras/demo/public/js/vendor/bootstrap.min.js +6 -0
- data/extras/demo/public/js/vendor/jquery-1.10.1.min.js +6 -0
- data/extras/demo/public/js/vendor/modernizr-2.6.2-respond-1.1.0.min.js +11 -0
- data/extras/drafts/parsing.md +137 -0
- data/extras/support/typo +66 -0
- data/features/bowler.feature +8 -0
- data/features/html.feature +229 -0
- data/features/step_definitions/bowler_steps.rb +39 -0
- data/features/step_definitions/html_steps.rb +11 -0
- data/features/support/env.rb +7 -0
- data/images/owl-old.png +0 -0
- data/images/owl-old.xcf +0 -0
- data/images/owl.png +0 -0
- data/images/owl.xcf +0 -0
- data/lib/qipowl/bowlers/cmd.rb +26 -0
- data/lib/qipowl/bowlers/html.rb +409 -0
- data/lib/qipowl/bowlers/htmldoc.rb +268 -0
- data/lib/qipowl/bowlers/yaml.rb +63 -0
- data/lib/qipowl/core/bowler.rb +251 -0
- data/lib/qipowl/core/mapper.rb +92 -0
- data/lib/qipowl/core/monkeypatches.rb +168 -0
- data/lib/qipowl/core/ruler.rb +106 -0
- data/lib/qipowl/utils/hash_recursive_merge.rb +72 -0
- data/lib/qipowl/utils/logging.rb +14 -0
- data/lib/qipowl/version.rb +3 -0
- data/lib/qipowl.rb +50 -0
- data/qipowl.gemspec +42 -0
- data/qipowl.komodoproject +4 -0
- data/spec/bowler_spec.rb +11 -0
- data/spec/spec_helper.rb +15 -0
- data/spec/string_spec.rb +32 -0
- data/spec/yaml_test.yaml +10 -0
- metadata +254 -0
@@ -0,0 +1,409 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'net/http'
|
4
|
+
require 'htmlbeautifier'
|
5
|
+
|
6
|
+
require_relative '../core/bowler'
|
7
|
+
require_relative '../bowlers/htmldoc'
|
8
|
+
|
9
|
+
module Qipowl
|
10
|
+
# Module placeholder for dynamically created bowlers
|
11
|
+
module Bowlers
|
12
|
+
class Html < Bowler
|
13
|
+
##############################################################################
|
14
|
+
### Default handlers for all the types of markup ###
|
15
|
+
##############################################################################
|
16
|
+
|
17
|
+
# `:grip` default handler
|
18
|
+
# @param [Array] args the words, gained since last call to {#harvest}
|
19
|
+
# @return [Array] the array of words with trimmed `grip` tag
|
20
|
+
def ∀_grip *args
|
21
|
+
text = [*args].join(SEPARATOR)
|
22
|
+
mine, rest = text.split("#{__callee__}∎", 2)
|
23
|
+
[tagify(∃_grip_tag(__callee__), {:class => ∃_grip(__callee__)[:class]}, mine), rest]
|
24
|
+
end
|
25
|
+
|
26
|
+
# `:alone` default handler
|
27
|
+
# @param [Array] args the words, gained since last call to {#harvest}
|
28
|
+
# @return [Array] the array of words with prepended `alone` tag
|
29
|
+
def ∀_alone *args
|
30
|
+
[standalone(∃_alone_tag(__callee__), {:class => ∃_alone(__callee__)[:class]}), args]
|
31
|
+
end
|
32
|
+
|
33
|
+
# `:block` default handler
|
34
|
+
# @param [Array] args the words, gained since last call to {#harvest}
|
35
|
+
# @param [String] param the text to be places on the same string as
|
36
|
+
# opening tag
|
37
|
+
# @return [Nil] nil
|
38
|
+
def ∀_block param, args
|
39
|
+
harvest __callee__,
|
40
|
+
tagify(
|
41
|
+
∃_block_tag(__callee__),
|
42
|
+
{:class => (param.strip.empty? ? ∃_block(__callee__)[:class] : param.strip)},
|
43
|
+
args.hsub(String::HTML_ENTITIES)
|
44
|
+
)
|
45
|
+
end
|
46
|
+
|
47
|
+
# `:magnet` default handler
|
48
|
+
# @param [Array] args the words, gained since last call to {#harvest}
|
49
|
+
# @return [Array] the array of words with trimmed `magnet` tag
|
50
|
+
def ∀_magnet *args
|
51
|
+
param, *rest = args.flatten
|
52
|
+
param = param.unbowl.to_s.prepend("#{__callee__}#{String::NBSP}")
|
53
|
+
[tagify(∃_magnet_tag(__callee__), {:class => ∃_magnet(__callee__)[:class]}, param), rest]
|
54
|
+
end
|
55
|
+
|
56
|
+
# `:regular` default handler
|
57
|
+
# @param [Array] args the words, gained since last call to {#harvest}
|
58
|
+
def ∀_regular *args
|
59
|
+
harvest __callee__,
|
60
|
+
tagify(
|
61
|
+
∃_regular_tag(canonize(__callee__)),
|
62
|
+
{:class => ∃_regular(canonize(__callee__))[:class]},
|
63
|
+
args
|
64
|
+
)
|
65
|
+
end
|
66
|
+
|
67
|
+
##############################################################################
|
68
|
+
### Grip :: Specific handlers ###
|
69
|
+
##############################################################################
|
70
|
+
# Handler for abbrs.
|
71
|
+
# @param [Array] args the words, gained since last call to {#harvest}
|
72
|
+
# @return [Array] the array of words with trimmed `abbr` tag
|
73
|
+
def † *args
|
74
|
+
term, *title = args.flatten
|
75
|
+
mine, rest = [*title].join(SEPARATOR).split("#{__callee__}∎", 2)
|
76
|
+
[tagify(∃_grip_tag(__callee__), {:title => mine, :class => ∃_grip(__callee__)[:class]}, term), rest]
|
77
|
+
end
|
78
|
+
|
79
|
+
# Handler for anchors.
|
80
|
+
# @param [Array] args the words, gained since last call to {#harvest}
|
81
|
+
# @return [Array] the array of words with trimmed `a` tag
|
82
|
+
def ⚓ *args
|
83
|
+
href, *title = args.flatten
|
84
|
+
mine, rest = [*title].join(SEPARATOR).split("#{__callee__}∎", 2)
|
85
|
+
href = href.unbowl
|
86
|
+
[
|
87
|
+
case get_href_content(href)
|
88
|
+
when :img
|
89
|
+
standalone :img, { :src => href, :alt => [*mine].join(SEPARATOR), :class => 'inplace' }
|
90
|
+
else
|
91
|
+
tagify ∃_grip_tag(__callee__), {:href => href}, mine
|
92
|
+
end, rest
|
93
|
+
]
|
94
|
+
end
|
95
|
+
|
96
|
+
##############################################################################
|
97
|
+
### Alone :: Specific handlers ###
|
98
|
+
##############################################################################
|
99
|
+
# `:alone` handler for horizontal rule; it differs from default
|
100
|
+
# handler since orphans around must be handled as well.
|
101
|
+
# @param [Array] args the words, gained since last call to {#harvest}
|
102
|
+
# @return [Nil] nil
|
103
|
+
def —— *args
|
104
|
+
harvest nil, orphan(args.join(SEPARATOR)) unless args.vacant?
|
105
|
+
harvest __callee__, standalone(∃_alone_tag(__callee__))
|
106
|
+
end
|
107
|
+
|
108
|
+
##############################################################################
|
109
|
+
### Block :: Specific handlers ###
|
110
|
+
##############################################################################
|
111
|
+
# `:block` handler for comment (required because comments are
|
112
|
+
# formatted in HTML in some specific way.)
|
113
|
+
# @param [String] param the text to be places on the same string as opening tag
|
114
|
+
# @param [Array] args the words, gained since last call to {#harvest}
|
115
|
+
# @return [Nil] nil
|
116
|
+
def ✍ *args
|
117
|
+
[]
|
118
|
+
end
|
119
|
+
|
120
|
+
##############################################################################
|
121
|
+
### Magnet :: Specific handlers ###
|
122
|
+
##############################################################################
|
123
|
+
# `:magnet` handler for reference to Livejournal user.
|
124
|
+
# @param [String] param the text to be places on the same string as opening tag
|
125
|
+
# @param [Array] args the words, gained since last call to {#harvest}
|
126
|
+
# @return [Nil] nil
|
127
|
+
def ✎ *args
|
128
|
+
param, *rest = args.flatten
|
129
|
+
param = param.unbowl
|
130
|
+
ljref = "<span style='white-space: nowrap;'><a href='http://#{param}.livejournal.com/profile?mode=full'><img src='http://l-stat.livejournal.com/img/userinfo.gif' alt='[info]' style='border: 0pt none ; vertical-align: bottom; padding-right: 1px;' height='17' width='17'></a><a href='http://#{param}.livejournal.com/?style=mine'><b>#{param}</b></a></span>"
|
131
|
+
[ljref, rest]
|
132
|
+
end
|
133
|
+
|
134
|
+
def ☇ *args
|
135
|
+
param, *rest = args.flatten
|
136
|
+
[tagify(∃_magnet_tag(__callee__), {:name => param.unbowl}, String::ZERO_WIDTH_SPACE), rest]
|
137
|
+
end
|
138
|
+
|
139
|
+
##############################################################################
|
140
|
+
### Regular :: Specific handlers ###
|
141
|
+
##############################################################################
|
142
|
+
# Handler for Youtube video
|
143
|
+
# @param [Array] args the words, gained since last call to {#harvest}
|
144
|
+
# @return [Nil] nil
|
145
|
+
def ✇ *args
|
146
|
+
id, *rest = args.flatten
|
147
|
+
harvest nil, orphan(rest.join(SEPARATOR)) unless rest.vacant?
|
148
|
+
harvest __callee__, %Q(
|
149
|
+
<iframe class='youtube' width='560' height='315' src='http://www.youtube.com/embed/#{id.unbowl}'
|
150
|
+
frameborder='0' allowfullscreen></iframe>
|
151
|
+
)
|
152
|
+
end
|
153
|
+
|
154
|
+
# Handler for standalone pictures and
|
155
|
+
# @todo Make it to understand quotes when there is a plain HTML on the other side
|
156
|
+
#
|
157
|
+
# @param
|
158
|
+
# @return [Nil] nil
|
159
|
+
def ⚘ *args
|
160
|
+
href, *title = args.flatten
|
161
|
+
harvest __callee__, %Q(
|
162
|
+
<figure>
|
163
|
+
<img src='#{href.unbowl}'/>
|
164
|
+
<figcaption>
|
165
|
+
<p>
|
166
|
+
#{title.join(SEPARATOR)}
|
167
|
+
</p>
|
168
|
+
</figcaption>
|
169
|
+
</figure>
|
170
|
+
)
|
171
|
+
end
|
172
|
+
|
173
|
+
# `:regular` handler for data lists (required since data list items
|
174
|
+
# consist of two tags: `dt` and `dd`.)
|
175
|
+
# @param [Array] args the words, gained since last call to {#harvest}
|
176
|
+
# @return [Nil] nil
|
177
|
+
def ▶ *args
|
178
|
+
dt, dd = args.join(SEPARATOR).split(/\s+(?:—)\s+/)
|
179
|
+
harvest __callee__, %Q(
|
180
|
+
#{tagify :dt, {}, dt}
|
181
|
+
#{tagify :dd, {}, dd}
|
182
|
+
)
|
183
|
+
end
|
184
|
+
# Alias for {#▶}, according to YAML rules specifies additional
|
185
|
+
# class for the data list `<dl>` tag behind (`dl-horizontal`.)
|
186
|
+
alias_method :▷, :▶
|
187
|
+
|
188
|
+
protected
|
189
|
+
# Computes the level of the `:linewide` element by counting
|
190
|
+
# preceeding non-breakable spaces. For instance, nested lists
|
191
|
+
# are produced by appending `"\u{00A0}"` to the line item
|
192
|
+
# DSL tag:
|
193
|
+
#
|
194
|
+
# li = "• li1 \u{00A0}• nested 1 \u{00A0}• nested 2 • li2"
|
195
|
+
#
|
196
|
+
# @param [Symbol|String] callee the DSL symbol to get the level information for.
|
197
|
+
# @return [Integer] the level requested.
|
198
|
+
#
|
199
|
+
def level callee
|
200
|
+
(callee = callee.to_s).gsub(/#{String::NBSP}/, '').empty? ?
|
201
|
+
-1 : (0..callee.length-1).each { |i| break i if callee[i] != String::NBSP }
|
202
|
+
end
|
203
|
+
|
204
|
+
def canonize callee
|
205
|
+
callee.to_s.gsub(/^#{String::NBSP}*/, '').to_sym if callee
|
206
|
+
end
|
207
|
+
|
208
|
+
# @see Qipowl::Bowler#harvest
|
209
|
+
#
|
210
|
+
# Additionally it checks if there was a `:linewide` item, requiring
|
211
|
+
# surrounding html element (like `<ul>` aroung several `<li>`s.)
|
212
|
+
#
|
213
|
+
# @param [Symbol] callee of method
|
214
|
+
# @param [String] str to be harvested
|
215
|
+
def harvest callee, str
|
216
|
+
if callee.nil? || callee != @callee
|
217
|
+
level(callee).downto(level(@callee) + 1) { |i|
|
218
|
+
str += i.␚ify
|
219
|
+
} unless ∃_enclosures(canonize(callee)).nil?
|
220
|
+
|
221
|
+
if prev = ∃_enclosures(canonize(@callee))
|
222
|
+
level(@callee).downto(level(callee) + 1) { |i|
|
223
|
+
@yielded.last.sub!(/\A/, opening(prev[:tag], {:class => prev[:class]}))
|
224
|
+
@yielded.each { |s| s.gsub!(/#{i.␚ify}/) { closing(prev[:tag]) } }
|
225
|
+
}
|
226
|
+
end
|
227
|
+
|
228
|
+
@callee = callee
|
229
|
+
end
|
230
|
+
super callee, str
|
231
|
+
end
|
232
|
+
|
233
|
+
private
|
234
|
+
# Hence we cannot simply declare the DSL for it, we need to handle
|
235
|
+
# calls to all the _methods_, starting with those symbols.
|
236
|
+
#
|
237
|
+
# @param [Symbol] method as specified by caller (`method_missing`.)
|
238
|
+
# @param [Array] args as specified by caller (`method_missing`.)
|
239
|
+
# @param [Proc] block as specified by caller (`method_missing`.)
|
240
|
+
#
|
241
|
+
# @return [Array] the array of words
|
242
|
+
def special_handler method, *args, &block
|
243
|
+
# Sublevel markers, e.g. “ •” is level 2 line-item
|
244
|
+
return [method, args].flatten \
|
245
|
+
unless level(method) > 0 && self.class::REGULAR_TAGS.keys.include?(canonize(method))
|
246
|
+
|
247
|
+
self.class.class_eval "alias_method :#{method}, :#{canonize(method)}"
|
248
|
+
send method, args, block
|
249
|
+
end
|
250
|
+
|
251
|
+
# Produces html paragraph tag (`<p>`) with class `owl`.
|
252
|
+
# @see Qipowl::Bowler#orphan
|
253
|
+
# @param str the words, to be put in paragraph tag.
|
254
|
+
# @return [String] tagged words.
|
255
|
+
def orphan str
|
256
|
+
"#{tagify(:p, {}, str.to_s.strip)}"
|
257
|
+
end
|
258
|
+
# Constructs opening html tag for the input given.
|
259
|
+
#
|
260
|
+
# To construct `abbr` tag with `title` _Title_ and class _default_:
|
261
|
+
#
|
262
|
+
# opening :abbr, { :title=>'Title', :class=>'default' }
|
263
|
+
#
|
264
|
+
# @param [String] tag to produce opening tag string from.
|
265
|
+
# @param [Hash] params to be put into opening tag as attributes.
|
266
|
+
# @return [String] opening tag for the input given.
|
267
|
+
def opening tag, params={}
|
268
|
+
attrs = params.inject("") { |m, el| m.prepend " #{el.first}='#{el.last}'" unless el.last.nil? ; m }
|
269
|
+
"<#{tag}#{attrs}>"
|
270
|
+
end
|
271
|
+
|
272
|
+
# Constructs closing html tag for the input given.
|
273
|
+
#
|
274
|
+
# @param [String] tag to produce closing tag string from.
|
275
|
+
# @return [String] opening tag for the input given.
|
276
|
+
def closing tag
|
277
|
+
"</#{tag}>"
|
278
|
+
end
|
279
|
+
|
280
|
+
# (see opening)
|
281
|
+
# Acts most like an {#opening} method, but closes an element inplace
|
282
|
+
# (used for `hr`, `br`, `img`).
|
283
|
+
def standalone tag, params={}
|
284
|
+
opening(tag, params).sub('>', '/>')
|
285
|
+
end
|
286
|
+
# Constructs valid tag for the input given, concatenating
|
287
|
+
# opening and closing tags around the text passed in `args`.
|
288
|
+
#
|
289
|
+
# @param [String] tag to produce html tag string from.
|
290
|
+
# @param [Hash] params to be put into opening tag as attributes.
|
291
|
+
# @param [Array] args the words, to be tagged around.
|
292
|
+
# @return [String] opening tag for the input given.
|
293
|
+
def tagify tag, params, *args
|
294
|
+
text = [*args].join(SEPARATOR)
|
295
|
+
text.vacant? ? '' : "#{opening tag, params}#{text}#{closing tag}"
|
296
|
+
end
|
297
|
+
|
298
|
+
|
299
|
+
# Determines content of remote link by href.
|
300
|
+
# TODO Make image patterns configurable.
|
301
|
+
# @param [String] href link to remote resource
|
302
|
+
# @return [Symbol] content type (`:img` or `:text` currently)
|
303
|
+
def get_href_content href
|
304
|
+
href = href.to_s.unbowl.strip
|
305
|
+
if href.end_with?(* %w{png jpg jpeg gif PNG JPG JPEG GIF})
|
306
|
+
:img
|
307
|
+
elsif /\/\/i\.chzbgr/ =~ href
|
308
|
+
:img
|
309
|
+
else
|
310
|
+
:text
|
311
|
+
end
|
312
|
+
|
313
|
+
# uri = URI(href.to_s.unbowl)
|
314
|
+
# Net::HTTP.start(uri.host, uri.port) do |http|
|
315
|
+
# http.open_timeout = 1
|
316
|
+
# http.read_timeout = 1
|
317
|
+
#
|
318
|
+
# request = Net::HTTP::Head.new uri
|
319
|
+
# response = http.request request
|
320
|
+
# case response.to_hash["content-type"].first
|
321
|
+
# when /image/ then return :img
|
322
|
+
# when /text/ then return :text
|
323
|
+
# end
|
324
|
+
# end
|
325
|
+
# :unknown
|
326
|
+
#rescue
|
327
|
+
# logger.warn "Unable to determine link [#{href.to_s.unbowl}] type: no internet connection. Reverting to default."
|
328
|
+
# :unknown
|
329
|
+
end
|
330
|
+
|
331
|
+
end
|
332
|
+
end
|
333
|
+
end
|
334
|
+
=begin
|
335
|
+
# Markup processor for Html output.
|
336
|
+
#
|
337
|
+
# This class produces HTML from markup as Markdown does.
|
338
|
+
|
339
|
+
# Amount of unnamed instances of the class (needed for new class name generation)
|
340
|
+
@@inst_count = 0
|
341
|
+
|
342
|
+
|
343
|
+
# `:handshake` default handler
|
344
|
+
# @param [String] from packed as string operand “before”
|
345
|
+
# @param [String] from packed as string operand “after”
|
346
|
+
# @return
|
347
|
+
def ∈ *args
|
348
|
+
from, till, *rest = args.flatten
|
349
|
+
tag = @mapping.handshake(__callee__)
|
350
|
+
tag = tag[:tag] if Hash === tag
|
351
|
+
[tagify(tag, {}, "#{from.unbowl}#{__callee__}#{till.unbowl}".gsub(String::SYMBOL_FOR_SPACE, ' ')), rest]
|
352
|
+
end
|
353
|
+
alias_method :⊂, :∈
|
354
|
+
|
355
|
+
|
356
|
+
|
357
|
+
# @see {Qipowl::Bowler#defreeze}
|
358
|
+
#
|
359
|
+
# Additionally it checks if tag is a `:block` tag and
|
360
|
+
# substitutes all the carriage returns (`$/`) with special symbol
|
361
|
+
# {String::CARRIAGE_RETURN} to prevent format damage.
|
362
|
+
#
|
363
|
+
# @param [String] str to be defreezed
|
364
|
+
def defreeze str
|
365
|
+
str = super str
|
366
|
+
@mapping[:block].each { |tag, htmltag|
|
367
|
+
str.gsub!(/(#{tag})(.*?)$(.*?)(#{tag}|\Z)/m) { |m|
|
368
|
+
"#{$1}('#{$2}', '#{$3}')\n\n"
|
369
|
+
}
|
370
|
+
}
|
371
|
+
str
|
372
|
+
end
|
373
|
+
|
374
|
+
# @see {Qipowl::Bowler#serveup}
|
375
|
+
#
|
376
|
+
# Additionally it beatifies the output HTML
|
377
|
+
#
|
378
|
+
# @param [String] str to be roasted
|
379
|
+
def serveup str
|
380
|
+
result = ''
|
381
|
+
%w(. , : ; ! ? »).map(&:bowl).each { |punct|
|
382
|
+
str.gsub!(/(?:\p{Space}|#{String::CARRIAGE_RETURN})*(#{punct})/, '\1')
|
383
|
+
# str.gsub!(/(#{punct})(?=\p{Alnum})/, '\1 ')
|
384
|
+
}
|
385
|
+
%w(«).map(&:bowl).each { |punct|
|
386
|
+
str.gsub!(/(#{punct})(?:\p{Space}|#{String::CARRIAGE_RETURN})*/, '\1')
|
387
|
+
str.gsub!(/(?<=\p{Alnum})(#{punct})/, ' \1')
|
388
|
+
}
|
389
|
+
served = super(str)
|
390
|
+
begin
|
391
|
+
HtmlBeautifier::Beautifier.new(result).scan(served)
|
392
|
+
rescue
|
393
|
+
logger.error "Was unable to tidyfy resulting HTML. Returning as is."
|
394
|
+
result = served
|
395
|
+
end
|
396
|
+
result
|
397
|
+
end
|
398
|
+
|
399
|
+
end
|
400
|
+
|
401
|
+
if __FILE__ == $0
|
402
|
+
|
403
|
+
i = 0
|
404
|
+
Dir.glob("#{File.dirname(__FILE__)}/../../../data/octopress-site/source/_posts/**/*.owl").each {|f|
|
405
|
+
puts "Processing ##{i += 1}: #{f}"
|
406
|
+
Qipowl::Html.parse File.read(f)
|
407
|
+
}
|
408
|
+
end
|
409
|
+
=end
|
@@ -0,0 +1,268 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'fileutils'
|
5
|
+
require 'yaml'
|
6
|
+
require_relative '../core/monkeypatches.rb'
|
7
|
+
require_relative '../utils/hash_recursive_merge.rb'
|
8
|
+
|
9
|
+
module Qipowl
|
10
|
+
|
11
|
+
class HtmlDoc < Nokogiri::XML::SAX::Document
|
12
|
+
attr_reader :qp, :tags
|
13
|
+
def initialize mapping
|
14
|
+
@mapping = mapping
|
15
|
+
@inside = nil
|
16
|
+
@collector = {}
|
17
|
+
@tags = {:inplace => {}, :linewide => {}}
|
18
|
+
@qp = ''
|
19
|
+
@level = 0
|
20
|
+
end
|
21
|
+
|
22
|
+
def start_element name, attributes = []
|
23
|
+
current_attrs = Hash[attributes]
|
24
|
+
|
25
|
+
@qp += case name.to_sym
|
26
|
+
when :p, :div
|
27
|
+
if current_attrs['class']
|
28
|
+
@collector[name.to_sym] = "✿_#{name.to_sym}_#{current_attrs['class'].gsub(/\s+/, '_')}".to_sym
|
29
|
+
@tags[:linewide][@collector[name.to_sym]] = "#{name.to_sym}†#{current_attrs['class'].gsub(/\s+/, '†')}".to_sym
|
30
|
+
"\n\n#{@collector[name.to_sym]} "
|
31
|
+
else
|
32
|
+
"\n\n"
|
33
|
+
end
|
34
|
+
when :ul, :ol, :table, :dl
|
35
|
+
@inside = name.to_sym
|
36
|
+
"\n"
|
37
|
+
when :pre then "\n\nΛ\n"
|
38
|
+
when :tr then " ┇ "
|
39
|
+
when :td then " ┆ "
|
40
|
+
when :a
|
41
|
+
@inside = :a
|
42
|
+
@collector[:href] = current_attrs['href']
|
43
|
+
@collector[:name] = current_attrs['name']
|
44
|
+
''
|
45
|
+
when :li then (@inside == :ol) ? "◦ " : "• "
|
46
|
+
when :b, :strong then "≡"
|
47
|
+
when :i, :em, :nobr then "≈"
|
48
|
+
when :strike, :del, :s then "─"
|
49
|
+
when :small then "↓"
|
50
|
+
when :u then "▁"
|
51
|
+
when :code, :tt then "λ"
|
52
|
+
when :dfn, :abbr, :cite
|
53
|
+
@inside = name.to_sym
|
54
|
+
@collector[:title] = current_attrs['title']
|
55
|
+
when :hr then "\n\n——\n\n"
|
56
|
+
when :br then " ⏎\n"
|
57
|
+
when :center then "\n— "
|
58
|
+
when :dt then "▷ "
|
59
|
+
when :dd then " — "
|
60
|
+
when :h1 then "§1 "
|
61
|
+
when :h2 then "§2 "
|
62
|
+
when :h3 then "§3 "
|
63
|
+
when :h4 then "§4 "
|
64
|
+
when :h5 then "§5 "
|
65
|
+
when :h6 then "§6 "
|
66
|
+
when :blockquote then "\n\n〉 "
|
67
|
+
when :figure
|
68
|
+
@inside = :figure
|
69
|
+
"\n\n"
|
70
|
+
when :figcaption then " "
|
71
|
+
when :img then fix_href(current_attrs['src'])
|
72
|
+
when :span, :sup
|
73
|
+
if current_attrs['class'].nil?
|
74
|
+
''
|
75
|
+
else
|
76
|
+
@collector[name.to_sym] = "✿_span_#{current_attrs['class'].gsub(/\s+/, '_')}".to_sym
|
77
|
+
@tags[:inplace][@collector[name.to_sym]] = "span†#{current_attrs['class'].gsub(/\s+/, '†')}".to_sym
|
78
|
+
" #{@collector[name.to_sym]}"
|
79
|
+
end
|
80
|
+
when :embed, :iframe then "\n\n#{current_attrs['src']}\n\n"
|
81
|
+
when :html, :body, :object, :param, :thead, :tbody, :font, :'lj-embed', :'lj-cut'
|
82
|
+
''
|
83
|
+
else
|
84
|
+
raise "=== Unhandled: #{name} with attrs: [#{current_attrs}]"
|
85
|
+
''
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def characters str
|
90
|
+
case @inside
|
91
|
+
when :a, :dfn, :abbr
|
92
|
+
@collector[:text] = str
|
93
|
+
else
|
94
|
+
@qp += str
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def end_element name
|
99
|
+
@qp += case name.to_sym
|
100
|
+
when :p, :div
|
101
|
+
@collector.delete(name.to_sym)
|
102
|
+
"\n\n"
|
103
|
+
when :a
|
104
|
+
@inside = nil
|
105
|
+
(href= @collector.delete(:href)) ?
|
106
|
+
" #{(@collector.delete(:text) || '').gsub(/\s+/, "\u{00A0}")}¹#{fix_href href} " :
|
107
|
+
"☇ #{@collector.delete(:name)} #{@collector.delete(:text)}"
|
108
|
+
when :dfn, :abbr, :cite
|
109
|
+
@inside = nil
|
110
|
+
result = " #{@collector.delete(:text).gsub(/\s+/, "\u{00A0}")}†#{@collector.delete(:title)}† " rescue ''
|
111
|
+
result
|
112
|
+
when :ul, :ol, :table, :dl
|
113
|
+
@inside = nil
|
114
|
+
"\n"
|
115
|
+
when :li then "\n"
|
116
|
+
when :pre then "\nΛ\n\n"
|
117
|
+
when :b, :strong then "≡"
|
118
|
+
when :i, :em, :nobr then "≈"
|
119
|
+
when :u then "▁"
|
120
|
+
when :dd then "\n"
|
121
|
+
when :strike, :del, :s then "─"
|
122
|
+
when :small then "↓"
|
123
|
+
when :code, :tt then "λ"
|
124
|
+
when :span, :sup
|
125
|
+
"#{@collector.delete(name.to_sym)} "
|
126
|
+
when :h1, :h2, :h3, :h4, :h5, :h6 then "\n\n"
|
127
|
+
when :blockquote then "\n\n"
|
128
|
+
when :figure
|
129
|
+
@inside = nil
|
130
|
+
"\n\n"
|
131
|
+
else
|
132
|
+
''
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
private
|
137
|
+
def fix_href href, site = 'http://mudasobwa.ru/'
|
138
|
+
href.start_with?('http') ? href : href.gsub(/\A\/+/, '').prepend(site)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
|
144
|
+
if __FILE__ == $0
|
145
|
+
|
146
|
+
def prepare str
|
147
|
+
str.gsub(/&[nm]dash;/, '—') # dashes
|
148
|
+
.gsub(/ /, ' ') # dashes
|
149
|
+
.gsub(/\s+--\s+/, ' — ') # dashes
|
150
|
+
.gsub(/^\s*/, '') # leading spaces
|
151
|
+
.gsub(/<img src="\/i\/>/, '')
|
152
|
+
.gsub(/™/, '™') # other entities
|
153
|
+
.gsub(/©/, '©') # other entities
|
154
|
+
.gsub(/(1st@1stone.ru|am@secondiary.ru)/, 'am@mudasobwa.ru')
|
155
|
+
.gsub(/http:\/\/(www\.)?(secondiary|1stone|matiouchkine.net)\.ru/, 'http://mudasobwa.ru') # obsolete site name
|
156
|
+
.gsub(/\[(http[^\]]*)\]/, '\1') # obsolete markdown pics
|
157
|
+
.gsub(/<span>\s*<\/span>/, 'λ\1λ') # obsolete markdown pics
|
158
|
+
.gsub(/<lj (?:comm|user)="(.*?)">/, '✎ \1') # obsolete markdown pics
|
159
|
+
.gsub(/<([^<>]*?@[^<>]*?)>/, '\1') # obsolete markdown pics
|
160
|
+
.gsub(/<imgsrc=/, '<img src=') # obsolete markdown pics
|
161
|
+
.gsub(/<ahref=/, '<a href=') # obsolete markdown pics
|
162
|
+
.gsub(/<\/p>\s*<p>\s*—/, " ⏎\n—") # direct speech
|
163
|
+
.gsub(/<br(?:\s*\/?\s*)>\s*<br(?:\s*\/?\s*)>/, "\n\n") # old-fashioned carriage
|
164
|
+
.gsub(/<[!]--[^<>]*?-->/, '') # comments
|
165
|
+
# .gsub(/([\.,:;!?])(?=\S)/, '\1 ') # fix punctuation
|
166
|
+
end
|
167
|
+
|
168
|
+
def postpare str
|
169
|
+
str.gsub(/\R{2,}/, "\n\n")
|
170
|
+
.gsub(/\A(\s|⏎)*/, '')
|
171
|
+
.gsub(/(\s|⏎)*\Z/, '')
|
172
|
+
end
|
173
|
+
|
174
|
+
tags = {
|
175
|
+
:magnet => {:✎ => :lj, :☇ => :a},
|
176
|
+
:inplace => {:▁ => :u, :─ => :del},
|
177
|
+
:linewide => {:☛ => :twit},
|
178
|
+
:block => {:✁ => :cut}
|
179
|
+
}
|
180
|
+
file = "#{File.dirname(__FILE__)}/../../../data/internals/posts.csv"
|
181
|
+
file_errors = "#{File.dirname(__FILE__)}/../../../data/internals/errors.txt"
|
182
|
+
FileUtils.rm file_errors if File.exist? file_errors
|
183
|
+
|
184
|
+
FileUtils.mkdir("#{File.dirname(__FILE__)}/../../../data/site")
|
185
|
+
# %w{txt pic ref twt}.each {|d| FileUtils.mkdir("#{File.dirname(__FILE__)}/../../../data/site/#{d}")}
|
186
|
+
|
187
|
+
puts "Reading #{file} …"
|
188
|
+
File.readlines(file).each { |l|
|
189
|
+
data = l.split('☢')
|
190
|
+
puts "Processing record #{data[0]}"
|
191
|
+
begin
|
192
|
+
html_doc = Qipowl::HtmlDoc.new nil
|
193
|
+
parser = Nokogiri::HTML::SAX::Parser.new(html_doc)
|
194
|
+
parser.parse(prepare data[2])
|
195
|
+
tags.rmerge! html_doc.tags
|
196
|
+
body = postpare(html_doc.qp)
|
197
|
+
|
198
|
+
body = body.strip if body
|
199
|
+
|
200
|
+
id = data[0]
|
201
|
+
title = data[1].gsub(/'/, "’")
|
202
|
+
date = data[3]
|
203
|
+
img = data[4]
|
204
|
+
|
205
|
+
if img && !img.empty? && !img.start_with?('http://')
|
206
|
+
img = "http://mudasobwa.ru/i/#{img.gsub(/\A\/+/, '')}"
|
207
|
+
end
|
208
|
+
|
209
|
+
q_doc = Qipowl::HtmlDoc.new nil
|
210
|
+
q_parser = Nokogiri::HTML::SAX::Parser.new(q_doc)
|
211
|
+
q_parser.parse(prepare data[5])
|
212
|
+
tags.rmerge! q_doc.tags
|
213
|
+
quote = postpare(q_doc.qp)
|
214
|
+
|
215
|
+
q_url = data[6]
|
216
|
+
type = data[7].to_i # 1 => text, 2 => image, 3 => quote, 4 => twit
|
217
|
+
stype = case type
|
218
|
+
when 1 then :txt
|
219
|
+
when 2 then :pic
|
220
|
+
when 3 then :ref
|
221
|
+
when 4 then :twt
|
222
|
+
else :txt
|
223
|
+
end
|
224
|
+
|
225
|
+
owl_text = %Q(---
|
226
|
+
title: '#{title}'
|
227
|
+
id: #{id}
|
228
|
+
date: '#{date}'
|
229
|
+
categories: [#{stype}]
|
230
|
+
---
|
231
|
+
|
232
|
+
)
|
233
|
+
# owl_text << (type == 4 ? "☛ " : "§1 ")
|
234
|
+
# owl_text << title
|
235
|
+
# owl_text << "\n\n"
|
236
|
+
|
237
|
+
owl_text << case type
|
238
|
+
when 2
|
239
|
+
"#{img} #{body.gsub(/\R/, ' ⏎ ')}"
|
240
|
+
when 3
|
241
|
+
q_ref = q_url[/http:\/\/(.*?)\/|\Z/, 1].split('.').last(2).join('.') rescue nil
|
242
|
+
"\n〉 #{quote.strip}\n‒ #{q_ref ? q_ref : q_url}, #{q_url}\n\n#{body}"
|
243
|
+
else body
|
244
|
+
end
|
245
|
+
|
246
|
+
fname = "#{date.split.first}-#{title.to_filename}.owl"
|
247
|
+
fname = (1..100).each {|i|
|
248
|
+
break "#{date.split.first}-#{title.to_filename}-#{i}.owl" \
|
249
|
+
unless File.exist?("#{File.dirname(__FILE__)}/../../../data/site/#{date.split.first}-#{title.to_filename}-#{i}.owl")
|
250
|
+
} if File.exist?("#{File.dirname(__FILE__)}/../../../data/site/#{fname}")
|
251
|
+
File.open("#{File.dirname(__FILE__)}/../../../data/site/#{fname}", 'a') { |f| f.write(owl_text) }
|
252
|
+
|
253
|
+
rescue Exception => e
|
254
|
+
puts '—'*40
|
255
|
+
puts 'Error occured'
|
256
|
+
puts prepare(data[2])
|
257
|
+
puts '—'*40
|
258
|
+
puts prepare(data[5])
|
259
|
+
puts '—'*40
|
260
|
+
raise e
|
261
|
+
end
|
262
|
+
}
|
263
|
+
|
264
|
+
File.open("#{File.dirname(__FILE__)}/../../../data/site/rules.yaml", 'a') { |f|
|
265
|
+
f.write(tags.to_yaml)
|
266
|
+
}
|
267
|
+
|
268
|
+
end
|