qipowl 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. checksums.yaml +7 -0
  2. data/.document +11 -0
  3. data/.gitignore +17 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +3 -0
  6. data/.yardopts +3 -0
  7. data/Gemfile +17 -0
  8. data/LICENSE +20 -0
  9. data/README.md +345 -0
  10. data/Rakefile +21 -0
  11. data/bin/bowler +44 -0
  12. data/config/bowlers/cmd.yaml +3 -0
  13. data/config/bowlers/html.yaml +128 -0
  14. data/config/bowlers/html_supplemental.yaml +3 -0
  15. data/config/bowlers/markdown2html.yaml +23 -0
  16. data/extras/demo/main.rb +34 -0
  17. data/extras/demo/public/apple-touch-icon-114x114-precomposed.png +0 -0
  18. data/extras/demo/public/apple-touch-icon-144x144-precomposed.png +0 -0
  19. data/extras/demo/public/apple-touch-icon-57x57-precomposed.png +0 -0
  20. data/extras/demo/public/apple-touch-icon-72x72-precomposed.png +0 -0
  21. data/extras/demo/public/apple-touch-icon-precomposed.png +0 -0
  22. data/extras/demo/public/apple-touch-icon.png +0 -0
  23. data/extras/demo/public/css/bootstrap-theme.css +384 -0
  24. data/extras/demo/public/css/bootstrap-theme.min.css +1 -0
  25. data/extras/demo/public/css/bootstrap.css +6805 -0
  26. data/extras/demo/public/css/bootstrap.min.css +9 -0
  27. data/extras/demo/public/css/main.css +22 -0
  28. data/extras/demo/public/favicon.ico +0 -0
  29. data/extras/demo/public/fonts/glyphicons-halflings-regular.eot +0 -0
  30. data/extras/demo/public/fonts/glyphicons-halflings-regular.svg +228 -0
  31. data/extras/demo/public/fonts/glyphicons-halflings-regular.ttf +0 -0
  32. data/extras/demo/public/fonts/glyphicons-halflings-regular.woff +0 -0
  33. data/extras/demo/public/html.html +262 -0
  34. data/extras/demo/public/index.html +110 -0
  35. data/extras/demo/public/js/main.js +1 -0
  36. data/extras/demo/public/js/vendor/bootstrap.js +1999 -0
  37. data/extras/demo/public/js/vendor/bootstrap.min.js +6 -0
  38. data/extras/demo/public/js/vendor/jquery-1.10.1.min.js +6 -0
  39. data/extras/demo/public/js/vendor/modernizr-2.6.2-respond-1.1.0.min.js +11 -0
  40. data/extras/drafts/parsing.md +137 -0
  41. data/extras/support/typo +66 -0
  42. data/features/bowler.feature +8 -0
  43. data/features/html.feature +229 -0
  44. data/features/step_definitions/bowler_steps.rb +39 -0
  45. data/features/step_definitions/html_steps.rb +11 -0
  46. data/features/support/env.rb +7 -0
  47. data/images/owl-old.png +0 -0
  48. data/images/owl-old.xcf +0 -0
  49. data/images/owl.png +0 -0
  50. data/images/owl.xcf +0 -0
  51. data/lib/qipowl/bowlers/cmd.rb +26 -0
  52. data/lib/qipowl/bowlers/html.rb +409 -0
  53. data/lib/qipowl/bowlers/htmldoc.rb +268 -0
  54. data/lib/qipowl/bowlers/yaml.rb +63 -0
  55. data/lib/qipowl/core/bowler.rb +251 -0
  56. data/lib/qipowl/core/mapper.rb +92 -0
  57. data/lib/qipowl/core/monkeypatches.rb +168 -0
  58. data/lib/qipowl/core/ruler.rb +106 -0
  59. data/lib/qipowl/utils/hash_recursive_merge.rb +72 -0
  60. data/lib/qipowl/utils/logging.rb +14 -0
  61. data/lib/qipowl/version.rb +3 -0
  62. data/lib/qipowl.rb +50 -0
  63. data/qipowl.gemspec +42 -0
  64. data/qipowl.komodoproject +4 -0
  65. data/spec/bowler_spec.rb +11 -0
  66. data/spec/spec_helper.rb +15 -0
  67. data/spec/string_spec.rb +32 -0
  68. data/spec/yaml_test.yaml +10 -0
  69. metadata +254 -0
@@ -0,0 +1,409 @@
1
+ # encoding: utf-8
2
+
3
+ require 'net/http'
4
+ require 'htmlbeautifier'
5
+
6
+ require_relative '../core/bowler'
7
+ require_relative '../bowlers/htmldoc'
8
+
9
+ module Qipowl
10
+ # Module placeholder for dynamically created bowlers
11
+ module Bowlers
12
+ class Html < Bowler
13
+ ##############################################################################
14
+ ### Default handlers for all the types of markup ###
15
+ ##############################################################################
16
+
17
+ # `:grip` default handler
18
+ # @param [Array] args the words, gained since last call to {#harvest}
19
+ # @return [Array] the array of words with trimmed `grip` tag
20
+ def ∀_grip *args
21
+ text = [*args].join(SEPARATOR)
22
+ mine, rest = text.split("#{__callee__}∎", 2)
23
+ [tagify(∃_grip_tag(__callee__), {:class => ∃_grip(__callee__)[:class]}, mine), rest]
24
+ end
25
+
26
+ # `:alone` default handler
27
+ # @param [Array] args the words, gained since last call to {#harvest}
28
+ # @return [Array] the array of words with prepended `alone` tag
29
+ def ∀_alone *args
30
+ [standalone(∃_alone_tag(__callee__), {:class => ∃_alone(__callee__)[:class]}), args]
31
+ end
32
+
33
+ # `:block` default handler
34
+ # @param [Array] args the words, gained since last call to {#harvest}
35
+ # @param [String] param the text to be places on the same string as
36
+ # opening tag
37
+ # @return [Nil] nil
38
+ def ∀_block param, args
39
+ harvest __callee__,
40
+ tagify(
41
+ ∃_block_tag(__callee__),
42
+ {:class => (param.strip.empty? ? ∃_block(__callee__)[:class] : param.strip)},
43
+ args.hsub(String::HTML_ENTITIES)
44
+ )
45
+ end
46
+
47
+ # `:magnet` default handler
48
+ # @param [Array] args the words, gained since last call to {#harvest}
49
+ # @return [Array] the array of words with trimmed `magnet` tag
50
+ def ∀_magnet *args
51
+ param, *rest = args.flatten
52
+ param = param.unbowl.to_s.prepend("#{__callee__}#{String::NBSP}")
53
+ [tagify(∃_magnet_tag(__callee__), {:class => ∃_magnet(__callee__)[:class]}, param), rest]
54
+ end
55
+
56
+ # `:regular` default handler
57
+ # @param [Array] args the words, gained since last call to {#harvest}
58
+ def ∀_regular *args
59
+ harvest __callee__,
60
+ tagify(
61
+ ∃_regular_tag(canonize(__callee__)),
62
+ {:class => ∃_regular(canonize(__callee__))[:class]},
63
+ args
64
+ )
65
+ end
66
+
67
+ ##############################################################################
68
+ ### Grip :: Specific handlers ###
69
+ ##############################################################################
70
+ # Handler for abbrs.
71
+ # @param [Array] args the words, gained since last call to {#harvest}
72
+ # @return [Array] the array of words with trimmed `abbr` tag
73
+ def † *args
74
+ term, *title = args.flatten
75
+ mine, rest = [*title].join(SEPARATOR).split("#{__callee__}∎", 2)
76
+ [tagify(∃_grip_tag(__callee__), {:title => mine, :class => ∃_grip(__callee__)[:class]}, term), rest]
77
+ end
78
+
79
+ # Handler for anchors.
80
+ # @param [Array] args the words, gained since last call to {#harvest}
81
+ # @return [Array] the array of words with trimmed `a` tag
82
+ def ⚓ *args
83
+ href, *title = args.flatten
84
+ mine, rest = [*title].join(SEPARATOR).split("#{__callee__}∎", 2)
85
+ href = href.unbowl
86
+ [
87
+ case get_href_content(href)
88
+ when :img
89
+ standalone :img, { :src => href, :alt => [*mine].join(SEPARATOR), :class => 'inplace' }
90
+ else
91
+ tagify ∃_grip_tag(__callee__), {:href => href}, mine
92
+ end, rest
93
+ ]
94
+ end
95
+
96
+ ##############################################################################
97
+ ### Alone :: Specific handlers ###
98
+ ##############################################################################
99
+ # `:alone` handler for horizontal rule; it differs from default
100
+ # handler since orphans around must be handled as well.
101
+ # @param [Array] args the words, gained since last call to {#harvest}
102
+ # @return [Nil] nil
103
+ def —— *args
104
+ harvest nil, orphan(args.join(SEPARATOR)) unless args.vacant?
105
+ harvest __callee__, standalone(∃_alone_tag(__callee__))
106
+ end
107
+
108
+ ##############################################################################
109
+ ### Block :: Specific handlers ###
110
+ ##############################################################################
111
+ # `:block` handler for comment (required because comments are
112
+ # formatted in HTML in some specific way.)
113
+ # @param [String] param the text to be places on the same string as opening tag
114
+ # @param [Array] args the words, gained since last call to {#harvest}
115
+ # @return [Nil] nil
116
+ def ✍ *args
117
+ []
118
+ end
119
+
120
+ ##############################################################################
121
+ ### Magnet :: Specific handlers ###
122
+ ##############################################################################
123
+ # `:magnet` handler for reference to Livejournal user.
124
+ # @param [String] param the text to be places on the same string as opening tag
125
+ # @param [Array] args the words, gained since last call to {#harvest}
126
+ # @return [Nil] nil
127
+ def ✎ *args
128
+ param, *rest = args.flatten
129
+ param = param.unbowl
130
+ ljref = "<span style='white-space: nowrap;'><a href='http://#{param}.livejournal.com/profile?mode=full'><img src='http://l-stat.livejournal.com/img/userinfo.gif' alt='[info]' style='border: 0pt none ; vertical-align: bottom; padding-right: 1px;' height='17' width='17'></a><a href='http://#{param}.livejournal.com/?style=mine'><b>#{param}</b></a></span>"
131
+ [ljref, rest]
132
+ end
133
+
134
+ def ☇ *args
135
+ param, *rest = args.flatten
136
+ [tagify(∃_magnet_tag(__callee__), {:name => param.unbowl}, String::ZERO_WIDTH_SPACE), rest]
137
+ end
138
+
139
+ ##############################################################################
140
+ ### Regular :: Specific handlers ###
141
+ ##############################################################################
142
+ # Handler for Youtube video
143
+ # @param [Array] args the words, gained since last call to {#harvest}
144
+ # @return [Nil] nil
145
+ def ✇ *args
146
+ id, *rest = args.flatten
147
+ harvest nil, orphan(rest.join(SEPARATOR)) unless rest.vacant?
148
+ harvest __callee__, %Q(
149
+ <iframe class='youtube' width='560' height='315' src='http://www.youtube.com/embed/#{id.unbowl}'
150
+ frameborder='0' allowfullscreen></iframe>
151
+ )
152
+ end
153
+
154
+ # Handler for standalone pictures and
155
+ # @todo Make it to understand quotes when there is a plain HTML on the other side
156
+ #
157
+ # @param
158
+ # @return [Nil] nil
159
+ def ⚘ *args
160
+ href, *title = args.flatten
161
+ harvest __callee__, %Q(
162
+ <figure>
163
+ <img src='#{href.unbowl}'/>
164
+ <figcaption>
165
+ <p>
166
+ #{title.join(SEPARATOR)}
167
+ </p>
168
+ </figcaption>
169
+ </figure>
170
+ )
171
+ end
172
+
173
+ # `:regular` handler for data lists (required since data list items
174
+ # consist of two tags: `dt` and `dd`.)
175
+ # @param [Array] args the words, gained since last call to {#harvest}
176
+ # @return [Nil] nil
177
+ def ▶ *args
178
+ dt, dd = args.join(SEPARATOR).split(/\s+(?:—)\s+/)
179
+ harvest __callee__, %Q(
180
+ #{tagify :dt, {}, dt}
181
+ #{tagify :dd, {}, dd}
182
+ )
183
+ end
184
+ # Alias for {#▶}, according to YAML rules specifies additional
185
+ # class for the data list `<dl>` tag behind (`dl-horizontal`.)
186
+ alias_method :▷, :▶
187
+
188
+ protected
189
+ # Computes the level of the `:linewide` element by counting
190
+ # preceeding non-breakable spaces. For instance, nested lists
191
+ # are produced by appending `"\u{00A0}"` to the line item
192
+ # DSL tag:
193
+ #
194
+ # li = "• li1 \u{00A0}• nested 1 \u{00A0}• nested 2 • li2"
195
+ #
196
+ # @param [Symbol|String] callee the DSL symbol to get the level information for.
197
+ # @return [Integer] the level requested.
198
+ #
199
+ def level callee
200
+ (callee = callee.to_s).gsub(/#{String::NBSP}/, '').empty? ?
201
+ -1 : (0..callee.length-1).each { |i| break i if callee[i] != String::NBSP }
202
+ end
203
+
204
+ def canonize callee
205
+ callee.to_s.gsub(/^#{String::NBSP}*/, '').to_sym if callee
206
+ end
207
+
208
+ # @see Qipowl::Bowler#harvest
209
+ #
210
+ # Additionally it checks if there was a `:linewide` item, requiring
211
+ # surrounding html element (like `<ul>` aroung several `<li>`s.)
212
+ #
213
+ # @param [Symbol] callee of method
214
+ # @param [String] str to be harvested
215
+ def harvest callee, str
216
+ if callee.nil? || callee != @callee
217
+ level(callee).downto(level(@callee) + 1) { |i|
218
+ str += i.␚ify
219
+ } unless ∃_enclosures(canonize(callee)).nil?
220
+
221
+ if prev = ∃_enclosures(canonize(@callee))
222
+ level(@callee).downto(level(callee) + 1) { |i|
223
+ @yielded.last.sub!(/\A/, opening(prev[:tag], {:class => prev[:class]}))
224
+ @yielded.each { |s| s.gsub!(/#{i.␚ify}/) { closing(prev[:tag]) } }
225
+ }
226
+ end
227
+
228
+ @callee = callee
229
+ end
230
+ super callee, str
231
+ end
232
+
233
+ private
234
+ # Hence we cannot simply declare the DSL for it, we need to handle
235
+ # calls to all the _methods_, starting with those symbols.
236
+ #
237
+ # @param [Symbol] method as specified by caller (`method_missing`.)
238
+ # @param [Array] args as specified by caller (`method_missing`.)
239
+ # @param [Proc] block as specified by caller (`method_missing`.)
240
+ #
241
+ # @return [Array] the array of words
242
+ def special_handler method, *args, &block
243
+ # Sublevel markers, e.g. “ •” is level 2 line-item
244
+ return [method, args].flatten \
245
+ unless level(method) > 0 && self.class::REGULAR_TAGS.keys.include?(canonize(method))
246
+
247
+ self.class.class_eval "alias_method :#{method}, :#{canonize(method)}"
248
+ send method, args, block
249
+ end
250
+
251
+ # Produces html paragraph tag (`<p>`) with class `owl`.
252
+ # @see Qipowl::Bowler#orphan
253
+ # @param str the words, to be put in paragraph tag.
254
+ # @return [String] tagged words.
255
+ def orphan str
256
+ "#{tagify(:p, {}, str.to_s.strip)}"
257
+ end
258
+ # Constructs opening html tag for the input given.
259
+ #
260
+ # To construct `abbr` tag with `title` _Title_ and class _default_:
261
+ #
262
+ # opening :abbr, { :title=>'Title', :class=>'default' }
263
+ #
264
+ # @param [String] tag to produce opening tag string from.
265
+ # @param [Hash] params to be put into opening tag as attributes.
266
+ # @return [String] opening tag for the input given.
267
+ def opening tag, params={}
268
+ attrs = params.inject("") { |m, el| m.prepend " #{el.first}='#{el.last}'" unless el.last.nil? ; m }
269
+ "<#{tag}#{attrs}>"
270
+ end
271
+
272
+ # Constructs closing html tag for the input given.
273
+ #
274
+ # @param [String] tag to produce closing tag string from.
275
+ # @return [String] opening tag for the input given.
276
+ def closing tag
277
+ "</#{tag}>"
278
+ end
279
+
280
+ # (see opening)
281
+ # Acts most like an {#opening} method, but closes an element inplace
282
+ # (used for `hr`, `br`, `img`).
283
+ def standalone tag, params={}
284
+ opening(tag, params).sub('>', '/>')
285
+ end
286
+ # Constructs valid tag for the input given, concatenating
287
+ # opening and closing tags around the text passed in `args`.
288
+ #
289
+ # @param [String] tag to produce html tag string from.
290
+ # @param [Hash] params to be put into opening tag as attributes.
291
+ # @param [Array] args the words, to be tagged around.
292
+ # @return [String] opening tag for the input given.
293
+ def tagify tag, params, *args
294
+ text = [*args].join(SEPARATOR)
295
+ text.vacant? ? '' : "#{opening tag, params}#{text}#{closing tag}"
296
+ end
297
+
298
+
299
+ # Determines content of remote link by href.
300
+ # TODO Make image patterns configurable.
301
+ # @param [String] href link to remote resource
302
+ # @return [Symbol] content type (`:img` or `:text` currently)
303
+ def get_href_content href
304
+ href = href.to_s.unbowl.strip
305
+ if href.end_with?(* %w{png jpg jpeg gif PNG JPG JPEG GIF})
306
+ :img
307
+ elsif /\/\/i\.chzbgr/ =~ href
308
+ :img
309
+ else
310
+ :text
311
+ end
312
+
313
+ # uri = URI(href.to_s.unbowl)
314
+ # Net::HTTP.start(uri.host, uri.port) do |http|
315
+ # http.open_timeout = 1
316
+ # http.read_timeout = 1
317
+ #
318
+ # request = Net::HTTP::Head.new uri
319
+ # response = http.request request
320
+ # case response.to_hash["content-type"].first
321
+ # when /image/ then return :img
322
+ # when /text/ then return :text
323
+ # end
324
+ # end
325
+ # :unknown
326
+ #rescue
327
+ # logger.warn "Unable to determine link [#{href.to_s.unbowl}] type: no internet connection. Reverting to default."
328
+ # :unknown
329
+ end
330
+
331
+ end
332
+ end
333
+ end
334
+ =begin
335
+ # Markup processor for Html output.
336
+ #
337
+ # This class produces HTML from markup as Markdown does.
338
+
339
+ # Amount of unnamed instances of the class (needed for new class name generation)
340
+ @@inst_count = 0
341
+
342
+
343
+ # `:handshake` default handler
344
+ # @param [String] from packed as string operand “before”
345
+ # @param [String] from packed as string operand “after”
346
+ # @return
347
+ def ∈ *args
348
+ from, till, *rest = args.flatten
349
+ tag = @mapping.handshake(__callee__)
350
+ tag = tag[:tag] if Hash === tag
351
+ [tagify(tag, {}, "#{from.unbowl}#{__callee__}#{till.unbowl}".gsub(String::SYMBOL_FOR_SPACE, ' ')), rest]
352
+ end
353
+ alias_method :⊂, :∈
354
+
355
+
356
+
357
+ # @see {Qipowl::Bowler#defreeze}
358
+ #
359
+ # Additionally it checks if tag is a `:block` tag and
360
+ # substitutes all the carriage returns (`$/`) with special symbol
361
+ # {String::CARRIAGE_RETURN} to prevent format damage.
362
+ #
363
+ # @param [String] str to be defreezed
364
+ def defreeze str
365
+ str = super str
366
+ @mapping[:block].each { |tag, htmltag|
367
+ str.gsub!(/(#{tag})(.*?)$(.*?)(#{tag}|\Z)/m) { |m|
368
+ "#{$1}('#{$2}', '#{$3}')\n\n"
369
+ }
370
+ }
371
+ str
372
+ end
373
+
374
+ # @see {Qipowl::Bowler#serveup}
375
+ #
376
+ # Additionally it beatifies the output HTML
377
+ #
378
+ # @param [String] str to be roasted
379
+ def serveup str
380
+ result = ''
381
+ %w(. , : ; ! ? »).map(&:bowl).each { |punct|
382
+ str.gsub!(/(?:\p{Space}|#{String::CARRIAGE_RETURN})*(#{punct})/, '\1')
383
+ # str.gsub!(/(#{punct})(?=\p{Alnum})/, '\1 ')
384
+ }
385
+ %w(«).map(&:bowl).each { |punct|
386
+ str.gsub!(/(#{punct})(?:\p{Space}|#{String::CARRIAGE_RETURN})*/, '\1')
387
+ str.gsub!(/(?<=\p{Alnum})(#{punct})/, ' \1')
388
+ }
389
+ served = super(str)
390
+ begin
391
+ HtmlBeautifier::Beautifier.new(result).scan(served)
392
+ rescue
393
+ logger.error "Was unable to tidyfy resulting HTML. Returning as is."
394
+ result = served
395
+ end
396
+ result
397
+ end
398
+
399
+ end
400
+
401
+ if __FILE__ == $0
402
+
403
+ i = 0
404
+ Dir.glob("#{File.dirname(__FILE__)}/../../../data/octopress-site/source/_posts/**/*.owl").each {|f|
405
+ puts "Processing ##{i += 1}: #{f}"
406
+ Qipowl::Html.parse File.read(f)
407
+ }
408
+ end
409
+ =end
@@ -0,0 +1,268 @@
1
+ # encoding: utf-8
2
+
3
+ require 'nokogiri'
4
+ require 'fileutils'
5
+ require 'yaml'
6
+ require_relative '../core/monkeypatches.rb'
7
+ require_relative '../utils/hash_recursive_merge.rb'
8
+
9
+ module Qipowl
10
+
11
+ class HtmlDoc < Nokogiri::XML::SAX::Document
12
+ attr_reader :qp, :tags
13
+ def initialize mapping
14
+ @mapping = mapping
15
+ @inside = nil
16
+ @collector = {}
17
+ @tags = {:inplace => {}, :linewide => {}}
18
+ @qp = ''
19
+ @level = 0
20
+ end
21
+
22
+ def start_element name, attributes = []
23
+ current_attrs = Hash[attributes]
24
+
25
+ @qp += case name.to_sym
26
+ when :p, :div
27
+ if current_attrs['class']
28
+ @collector[name.to_sym] = "✿_#{name.to_sym}_#{current_attrs['class'].gsub(/\s+/, '_')}".to_sym
29
+ @tags[:linewide][@collector[name.to_sym]] = "#{name.to_sym}†#{current_attrs['class'].gsub(/\s+/, '†')}".to_sym
30
+ "\n\n#{@collector[name.to_sym]} "
31
+ else
32
+ "\n\n"
33
+ end
34
+ when :ul, :ol, :table, :dl
35
+ @inside = name.to_sym
36
+ "\n"
37
+ when :pre then "\n\nΛ\n"
38
+ when :tr then " ┇ "
39
+ when :td then " ┆ "
40
+ when :a
41
+ @inside = :a
42
+ @collector[:href] = current_attrs['href']
43
+ @collector[:name] = current_attrs['name']
44
+ ''
45
+ when :li then (@inside == :ol) ? "◦ " : "• "
46
+ when :b, :strong then "≡"
47
+ when :i, :em, :nobr then "≈"
48
+ when :strike, :del, :s then "─"
49
+ when :small then "↓"
50
+ when :u then "▁"
51
+ when :code, :tt then "λ"
52
+ when :dfn, :abbr, :cite
53
+ @inside = name.to_sym
54
+ @collector[:title] = current_attrs['title']
55
+ when :hr then "\n\n——\n\n"
56
+ when :br then " ⏎\n"
57
+ when :center then "\n— "
58
+ when :dt then "▷ "
59
+ when :dd then " — "
60
+ when :h1 then "§1 "
61
+ when :h2 then "§2 "
62
+ when :h3 then "§3 "
63
+ when :h4 then "§4 "
64
+ when :h5 then "§5 "
65
+ when :h6 then "§6 "
66
+ when :blockquote then "\n\n〉 "
67
+ when :figure
68
+ @inside = :figure
69
+ "\n\n"
70
+ when :figcaption then " "
71
+ when :img then fix_href(current_attrs['src'])
72
+ when :span, :sup
73
+ if current_attrs['class'].nil?
74
+ ''
75
+ else
76
+ @collector[name.to_sym] = "✿_span_#{current_attrs['class'].gsub(/\s+/, '_')}".to_sym
77
+ @tags[:inplace][@collector[name.to_sym]] = "span†#{current_attrs['class'].gsub(/\s+/, '†')}".to_sym
78
+ " #{@collector[name.to_sym]}"
79
+ end
80
+ when :embed, :iframe then "\n\n#{current_attrs['src']}\n\n"
81
+ when :html, :body, :object, :param, :thead, :tbody, :font, :'lj-embed', :'lj-cut'
82
+ ''
83
+ else
84
+ raise "=== Unhandled: #{name} with attrs: [#{current_attrs}]"
85
+ ''
86
+ end
87
+ end
88
+
89
+ def characters str
90
+ case @inside
91
+ when :a, :dfn, :abbr
92
+ @collector[:text] = str
93
+ else
94
+ @qp += str
95
+ end
96
+ end
97
+
98
+ def end_element name
99
+ @qp += case name.to_sym
100
+ when :p, :div
101
+ @collector.delete(name.to_sym)
102
+ "\n\n"
103
+ when :a
104
+ @inside = nil
105
+ (href= @collector.delete(:href)) ?
106
+ " #{(@collector.delete(:text) || '').gsub(/\s+/, "\u{00A0}")}¹#{fix_href href} " :
107
+ "☇ #{@collector.delete(:name)} #{@collector.delete(:text)}"
108
+ when :dfn, :abbr, :cite
109
+ @inside = nil
110
+ result = " #{@collector.delete(:text).gsub(/\s+/, "\u{00A0}")}†#{@collector.delete(:title)}† " rescue ''
111
+ result
112
+ when :ul, :ol, :table, :dl
113
+ @inside = nil
114
+ "\n"
115
+ when :li then "\n"
116
+ when :pre then "\nΛ\n\n"
117
+ when :b, :strong then "≡"
118
+ when :i, :em, :nobr then "≈"
119
+ when :u then "▁"
120
+ when :dd then "\n"
121
+ when :strike, :del, :s then "─"
122
+ when :small then "↓"
123
+ when :code, :tt then "λ"
124
+ when :span, :sup
125
+ "#{@collector.delete(name.to_sym)} "
126
+ when :h1, :h2, :h3, :h4, :h5, :h6 then "\n\n"
127
+ when :blockquote then "\n\n"
128
+ when :figure
129
+ @inside = nil
130
+ "\n\n"
131
+ else
132
+ ''
133
+ end
134
+ end
135
+
136
+ private
137
+ def fix_href href, site = 'http://mudasobwa.ru/'
138
+ href.start_with?('http') ? href : href.gsub(/\A\/+/, '').prepend(site)
139
+ end
140
+ end
141
+
142
+ end
143
+
144
+ if __FILE__ == $0
145
+
146
+ def prepare str
147
+ str.gsub(/&[nm]dash;/, '—') # dashes
148
+ .gsub(/&nbsp;/, ' ') # dashes
149
+ .gsub(/\s+--\s+/, ' — ') # dashes
150
+ .gsub(/^\s*/, '') # leading spaces
151
+ .gsub(/<img src="\/i\/>/, '')
152
+ .gsub(/&trade;/, '™') # other entities
153
+ .gsub(/&copy;/, '©') # other entities
154
+ .gsub(/(1st@1stone.ru|am@secondiary.ru)/, 'am@mudasobwa.ru')
155
+ .gsub(/http:\/\/(www\.)?(secondiary|1stone|matiouchkine.net)\.ru/, 'http://mudasobwa.ru') # obsolete site name
156
+ .gsub(/\[(http[^\]]*)\]/, '\1') # obsolete markdown pics
157
+ .gsub(/<span>\s*<\/span>/, 'λ\1λ') # obsolete markdown pics
158
+ .gsub(/<lj (?:comm|user)="(.*?)">/, '✎ \1') # obsolete markdown pics
159
+ .gsub(/<([^<>]*?@[^<>]*?)>/, '\1') # obsolete markdown pics
160
+ .gsub(/<imgsrc=/, '<img src=') # obsolete markdown pics
161
+ .gsub(/<ahref=/, '<a href=') # obsolete markdown pics
162
+ .gsub(/<\/p>\s*<p>\s*—/, " ⏎\n—") # direct speech
163
+ .gsub(/<br(?:\s*\/?\s*)>\s*<br(?:\s*\/?\s*)>/, "\n\n") # old-fashioned carriage
164
+ .gsub(/<[!]--[^<>]*?-->/, '') # comments
165
+ # .gsub(/([\.,:;!?])(?=\S)/, '\1 ') # fix punctuation
166
+ end
167
+
168
+ def postpare str
169
+ str.gsub(/\R{2,}/, "\n\n")
170
+ .gsub(/\A(\s|⏎)*/, '')
171
+ .gsub(/(\s|⏎)*\Z/, '')
172
+ end
173
+
174
+ tags = {
175
+ :magnet => {:✎ => :lj, :☇ => :a},
176
+ :inplace => {:▁ => :u, :─ => :del},
177
+ :linewide => {:☛ => :twit},
178
+ :block => {:✁ => :cut}
179
+ }
180
+ file = "#{File.dirname(__FILE__)}/../../../data/internals/posts.csv"
181
+ file_errors = "#{File.dirname(__FILE__)}/../../../data/internals/errors.txt"
182
+ FileUtils.rm file_errors if File.exist? file_errors
183
+
184
+ FileUtils.mkdir("#{File.dirname(__FILE__)}/../../../data/site")
185
+ # %w{txt pic ref twt}.each {|d| FileUtils.mkdir("#{File.dirname(__FILE__)}/../../../data/site/#{d}")}
186
+
187
+ puts "Reading #{file} …"
188
+ File.readlines(file).each { |l|
189
+ data = l.split('☢')
190
+ puts "Processing record #{data[0]}"
191
+ begin
192
+ html_doc = Qipowl::HtmlDoc.new nil
193
+ parser = Nokogiri::HTML::SAX::Parser.new(html_doc)
194
+ parser.parse(prepare data[2])
195
+ tags.rmerge! html_doc.tags
196
+ body = postpare(html_doc.qp)
197
+
198
+ body = body.strip if body
199
+
200
+ id = data[0]
201
+ title = data[1].gsub(/'/, "’")
202
+ date = data[3]
203
+ img = data[4]
204
+
205
+ if img && !img.empty? && !img.start_with?('http://')
206
+ img = "http://mudasobwa.ru/i/#{img.gsub(/\A\/+/, '')}"
207
+ end
208
+
209
+ q_doc = Qipowl::HtmlDoc.new nil
210
+ q_parser = Nokogiri::HTML::SAX::Parser.new(q_doc)
211
+ q_parser.parse(prepare data[5])
212
+ tags.rmerge! q_doc.tags
213
+ quote = postpare(q_doc.qp)
214
+
215
+ q_url = data[6]
216
+ type = data[7].to_i # 1 => text, 2 => image, 3 => quote, 4 => twit
217
+ stype = case type
218
+ when 1 then :txt
219
+ when 2 then :pic
220
+ when 3 then :ref
221
+ when 4 then :twt
222
+ else :txt
223
+ end
224
+
225
+ owl_text = %Q(---
226
+ title: '#{title}'
227
+ id: #{id}
228
+ date: '#{date}'
229
+ categories: [#{stype}]
230
+ ---
231
+
232
+ )
233
+ # owl_text << (type == 4 ? "☛ " : "§1 ")
234
+ # owl_text << title
235
+ # owl_text << "\n\n"
236
+
237
+ owl_text << case type
238
+ when 2
239
+ "#{img} #{body.gsub(/\R/, ' ⏎ ')}"
240
+ when 3
241
+ q_ref = q_url[/http:\/\/(.*?)\/|\Z/, 1].split('.').last(2).join('.') rescue nil
242
+ "\n〉 #{quote.strip}\n‒ #{q_ref ? q_ref : q_url}, #{q_url}\n\n#{body}"
243
+ else body
244
+ end
245
+
246
+ fname = "#{date.split.first}-#{title.to_filename}.owl"
247
+ fname = (1..100).each {|i|
248
+ break "#{date.split.first}-#{title.to_filename}-#{i}.owl" \
249
+ unless File.exist?("#{File.dirname(__FILE__)}/../../../data/site/#{date.split.first}-#{title.to_filename}-#{i}.owl")
250
+ } if File.exist?("#{File.dirname(__FILE__)}/../../../data/site/#{fname}")
251
+ File.open("#{File.dirname(__FILE__)}/../../../data/site/#{fname}", 'a') { |f| f.write(owl_text) }
252
+
253
+ rescue Exception => e
254
+ puts '—'*40
255
+ puts 'Error occured'
256
+ puts prepare(data[2])
257
+ puts '—'*40
258
+ puts prepare(data[5])
259
+ puts '—'*40
260
+ raise e
261
+ end
262
+ }
263
+
264
+ File.open("#{File.dirname(__FILE__)}/../../../data/site/rules.yaml", 'a') { |f|
265
+ f.write(tags.to_yaml)
266
+ }
267
+
268
+ end