autolinker 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: fc2ad1275bb574e26d7e33b79e53568238569e55
4
+ data.tar.gz: 852a3bd4b302ce7345cc308ee383b327b0d57583
5
+ SHA512:
6
+ metadata.gz: 36dd7e68793861d42dad25a2fefd90a981d35fd10f4aa8cc84968be88650d74ccfea13ddfc1242a9eba414d0e211e3c2f95f9f22a567323a6b1673a3844aeab1
7
+ data.tar.gz: 34efc38f4cf30d88919e6f40448cc36a4620e510928f933c06f7ba8e0063f6f9f8a3c71629b4d5e3e671052cdf1a291ada4e94d020396690a59b5b153e91682f
@@ -0,0 +1,31 @@
1
+ # autolinker
2
+
3
+ autolinker is my a standalone version of [rails_autolink](https://github.com/tenderlove/rails_autolink) without the dependencies
4
+ on Rails.
5
+
6
+ I want to mention [Rinku](https://github.com/vmg/rinku) which is an awesome gem for MRI. However, since Rinku is not working
7
+ under JRuby, I had no choice than create my own version.
8
+
9
+ ## Warning
10
+
11
+ This code is not production ready yet. I took parts from rails_autolink which are necessarily needed. The code needs
12
+ to be cleaned up and more tested.
13
+
14
+ ## Contributing to autolinker
15
+
16
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
17
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
18
+ * Fork the project.
19
+ * Start a feature/bugfix branch.
20
+ * Commit and push until you are happy with your contribution.
21
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
22
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
23
+
24
+ ## Credits
25
+
26
+ This work is a partial port of [rails_autolink](https://github.com/tenderlove/rails_autolink).
27
+
28
+ ## Copyright
29
+
30
+ Copyright (c) 2013 Helmut Juskewycz. See LICENSE for further details.
31
+
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "autolinker"
18
+ gem.homepage = "http://github.com/hjuskewycz/autolinker"
19
+ gem.license = "MIT"
20
+ gem.summary = "Extract of the rails_autolink gem for standalone usage."
21
+ gem.description = %Q{The rails_autlink gem provides an easy way to generate links in texts. However, the
22
+ gem is tightly coupled to Rails which isn't always needed. This gem extract the autolink
23
+ behavior of rails_autolink, so it can be used standalone.}
24
+ gem.email = "helmut@juskewycz.com"
25
+ gem.authors = ["Helmut Juskewycz"]
26
+ # dependencies defined in Gemfile
27
+ end
28
+ Jeweler::RubygemsDotOrgTasks.new
29
+
30
+ require 'rake/testtask'
31
+ Rake::TestTask.new(:test) do |test|
32
+ test.libs << 'lib' << 'test'
33
+ test.pattern = 'test/**/test_*.rb'
34
+ test.verbose = true
35
+ end
36
+
37
+ require 'rdoc/task'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "autolinker #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
@@ -0,0 +1,30 @@
1
+ # encoding: UTF-8
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'autolinker'
5
+ s.version = '0.1'
6
+ s.summary = 'Mostly autolinking'
7
+ s.description = <<-EOF
8
+ An autolinking library that acts as a standalone replacement for Rails `auto_link`
9
+ EOF
10
+ s.email = 'helmut@juskewycz'
11
+ s.homepage = 'http://github.com/hjuskewycz/autolinker'
12
+ s.authors = ['Helmut Juskewycz']
13
+ s.license = 'MIT'
14
+ # = MANIFEST =
15
+ s.files = %w[
16
+ README.md
17
+ Rakefile
18
+ lib/autolinker.rb
19
+ lib/autolinker/text_helper.rb
20
+ lib/autolinker/html/node.rb
21
+ lib/autolinker/html/sanitizer.rb
22
+ lib/autolinker/html/tokenizer.rb
23
+ autolinker.gemspec
24
+ test/helper.rb
25
+ test/test_autolinker.rb
26
+ ]
27
+ # = MANIFEST =
28
+ s.test_files = ['test/test_autolinker.rb']
29
+ s.require_paths = ['lib']
30
+ end
@@ -0,0 +1,9 @@
1
+ require 'erb'
2
+ require 'autolinker/html/node'
3
+ require 'autolinker/html/sanitizer'
4
+ require 'autolinker/html/tokenizer'
5
+ require 'autolinker/text_helper'
6
+
7
+ module Autolinker
8
+ # main module.
9
+ end
@@ -0,0 +1,536 @@
1
+ require 'strscan'
2
+
3
+ module Autolinker
4
+ module HTML #:nodoc:
5
+
6
+ class Conditions < Hash #:nodoc:
7
+ def initialize(hash)
8
+ super()
9
+ hash = { :content => hash } unless Hash === hash
10
+ hash = keys_to_symbols(hash)
11
+ hash.each do |k, v|
12
+ case k
13
+ when :tag, :content then
14
+ # keys are valid, and require no further processing
15
+ when :attributes then
16
+ hash[k] = keys_to_strings(v)
17
+ when :parent, :child, :ancestor, :descendant, :sibling, :before,
18
+ :after
19
+ hash[k] = Conditions.new(v)
20
+ when :children
21
+ hash[k] = v = keys_to_symbols(v)
22
+ v.each do |key, value|
23
+ case key
24
+ when :count, :greater_than, :less_than
25
+ # keys are valid, and require no further processing
26
+ when :only
27
+ v[key] = Conditions.new(value)
28
+ else
29
+ raise "illegal key #{key.inspect} => #{value.inspect}"
30
+ end
31
+ end
32
+ else
33
+ raise "illegal key #{k.inspect} => #{v.inspect}"
34
+ end
35
+ end
36
+ update hash
37
+ end
38
+
39
+ private
40
+
41
+ def keys_to_strings(hash)
42
+ Hash[hash.keys.map { |k| [k.to_s, hash[k]] }]
43
+ end
44
+
45
+ def keys_to_symbols(hash)
46
+ Hash[hash.keys.map do |k|
47
+ raise "illegal key #{k.inspect}" unless k.respond_to?(:to_sym)
48
+ [k.to_sym, hash[k]]
49
+ end]
50
+ end
51
+ end
52
+
53
+ # The base class of all nodes, textual and otherwise, in an HTML document.
54
+ class Node #:nodoc:
55
+ # The array of children of this node. Not all nodes have children.
56
+ attr_reader :children
57
+
58
+ # The parent node of this node. All nodes have a parent, except for the
59
+ # root node.
60
+ attr_reader :parent
61
+
62
+ # The line number of the input where this node was begun
63
+ attr_reader :line
64
+
65
+ # The byte position in the input where this node was begun
66
+ attr_reader :position
67
+
68
+ # Create a new node as a child of the given parent.
69
+ def initialize(parent, line=0, pos=0)
70
+ @parent = parent
71
+ @children = []
72
+ @line, @position = line, pos
73
+ end
74
+
75
+ # Return a textual representation of the node.
76
+ def to_s
77
+ @children.join()
78
+ end
79
+
80
+ # Return false (subclasses must override this to provide specific matching
81
+ # behavior.) +conditions+ may be of any type.
82
+ def match(conditions)
83
+ false
84
+ end
85
+
86
+ # Search the children of this node for the first node for which #find
87
+ # returns non +nil+. Returns the result of the #find call that succeeded.
88
+ def find(conditions)
89
+ conditions = validate_conditions(conditions)
90
+ @children.each do |child|
91
+ node = child.find(conditions)
92
+ return node if node
93
+ end
94
+ nil
95
+ end
96
+
97
+ # Search for all nodes that match the given conditions, and return them
98
+ # as an array.
99
+ def find_all(conditions)
100
+ conditions = validate_conditions(conditions)
101
+
102
+ matches = []
103
+ matches << self if match(conditions)
104
+ @children.each do |child|
105
+ matches.concat child.find_all(conditions)
106
+ end
107
+ matches
108
+ end
109
+
110
+ # Returns +false+. Subclasses may override this if they define a kind of
111
+ # tag.
112
+ def tag?
113
+ false
114
+ end
115
+
116
+ def validate_conditions(conditions)
117
+ Conditions === conditions ? conditions : Conditions.new(conditions)
118
+ end
119
+
120
+ def ==(node)
121
+ return false unless self.class == node.class && children.size == node.children.size
122
+
123
+ equivalent = true
124
+
125
+ children.size.times do |i|
126
+ equivalent &&= children[i] == node.children[i]
127
+ end
128
+
129
+ equivalent
130
+ end
131
+
132
+ class <<self
133
+ def parse(parent, line, pos, content, strict=true)
134
+ if content !~ /^<\S/
135
+ Text.new(parent, line, pos, content)
136
+ else
137
+ scanner = StringScanner.new(content)
138
+
139
+ unless scanner.skip(/</)
140
+ if strict
141
+ raise "expected <"
142
+ else
143
+ return Text.new(parent, line, pos, content)
144
+ end
145
+ end
146
+
147
+ if scanner.skip(/!\[CDATA\[/)
148
+ unless scanner.skip_until(/\]\]>/)
149
+ if strict
150
+ raise "expected ]]> (got #{scanner.rest.inspect} for #{content})"
151
+ else
152
+ scanner.skip_until(/\Z/)
153
+ end
154
+ end
155
+
156
+ return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/<!\[CDATA\[/, ''))
157
+ end
158
+
159
+ closing = (scanner.scan(/\//) ? :close : nil)
160
+ return Text.new(parent, line, pos, content) unless name = scanner.scan(/[^\s!>\/]+/)
161
+ name.downcase!
162
+
163
+ unless closing
164
+ scanner.skip(/\s*/)
165
+ attributes = {}
166
+ while attr = scanner.scan(/[-\w:]+/)
167
+ value = true
168
+ if scanner.scan(/\s*=\s*/)
169
+ if delim = scanner.scan(/['"]/)
170
+ value = ""
171
+ while text = scanner.scan(/[^#{delim}\\]+|./)
172
+ case text
173
+ when "\\" then
174
+ value << text
175
+ break if scanner.eos?
176
+ value << scanner.getch
177
+ when delim
178
+ break
179
+ else
180
+ value << text
181
+ end
182
+ end
183
+ else
184
+ value = scanner.scan(/[^\s>\/]+/)
185
+ end
186
+ end
187
+ attributes[attr.downcase] = value
188
+ scanner.skip(/\s*/)
189
+ end
190
+
191
+ closing = (scanner.scan(/\//) ? :self : nil)
192
+ end
193
+
194
+ unless scanner.scan(/\s*>/)
195
+ if strict
196
+ raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})"
197
+ else
198
+ # throw away all text until we find what we're looking for
199
+ scanner.skip_until(/>/) or scanner.terminate
200
+ end
201
+ end
202
+
203
+ Tag.new(parent, line, pos, name, attributes, closing)
204
+ end
205
+ end
206
+ end
207
+ end
208
+
209
+ # A node that represents text, rather than markup.
210
+ class Text < Node #:nodoc:
211
+
212
+ attr_reader :content
213
+
214
+ # Creates a new text node as a child of the given parent, with the given
215
+ # content.
216
+ def initialize(parent, line, pos, content)
217
+ super(parent, line, pos)
218
+ @content = content
219
+ end
220
+
221
+ # Returns the content of this node.
222
+ def to_s
223
+ @content
224
+ end
225
+
226
+ # Returns +self+ if this node meets the given conditions. Text nodes support
227
+ # conditions of the following kinds:
228
+ #
229
+ # * if +conditions+ is a string, it must be a substring of the node's
230
+ # content
231
+ # * if +conditions+ is a regular expression, it must match the node's
232
+ # content
233
+ # * if +conditions+ is a hash, it must contain a <tt>:content</tt> key that
234
+ # is either a string or a regexp, and which is interpreted as described
235
+ # above.
236
+ def find(conditions)
237
+ match(conditions) && self
238
+ end
239
+
240
+ # Returns non-+nil+ if this node meets the given conditions, or +nil+
241
+ # otherwise. See the discussion of #find for the valid conditions.
242
+ def match(conditions)
243
+ case conditions
244
+ when String
245
+ @content == conditions
246
+ when Regexp
247
+ @content =~ conditions
248
+ when Hash
249
+ conditions = validate_conditions(conditions)
250
+
251
+ # Text nodes only have :content, :parent, :ancestor
252
+ unless (conditions.keys - [:content, :parent, :ancestor]).empty?
253
+ return false
254
+ end
255
+
256
+ match(conditions[:content])
257
+ else
258
+ nil
259
+ end
260
+ end
261
+
262
+ def ==(node)
263
+ return false unless super
264
+ content == node.content
265
+ end
266
+ end
267
+
268
+ # A CDATA node is simply a text node with a specialized way of displaying
269
+ # itself.
270
+ class CDATA < Text #:nodoc:
271
+ def to_s
272
+ "<![CDATA[#{super}]]>"
273
+ end
274
+ end
275
+
276
+ # A Tag is any node that represents markup. It may be an opening tag, a
277
+ # closing tag, or a self-closing tag. It has a name, and may have a hash of
278
+ # attributes.
279
+ class Tag < Node #:nodoc:
280
+
281
+ # Either +nil+, <tt>:close</tt>, or <tt>:self</tt>
282
+ attr_reader :closing
283
+
284
+ # Either +nil+, or a hash of attributes for this node.
285
+ attr_reader :attributes
286
+
287
+ # The name of this tag.
288
+ attr_reader :name
289
+
290
+ # Create a new node as a child of the given parent, using the given content
291
+ # to describe the node. It will be parsed and the node name, attributes and
292
+ # closing status extracted.
293
+ def initialize(parent, line, pos, name, attributes, closing)
294
+ super(parent, line, pos)
295
+ @name = name
296
+ @attributes = attributes
297
+ @closing = closing
298
+ end
299
+
300
+ # A convenience for obtaining an attribute of the node. Returns +nil+ if
301
+ # the node has no attributes.
302
+ def [](attr)
303
+ @attributes ? @attributes[attr] : nil
304
+ end
305
+
306
+ # Returns non-+nil+ if this tag can contain child nodes.
307
+ def childless?(xml = false)
308
+ return false if xml && @closing.nil?
309
+ !@closing.nil? ||
310
+ @name =~ /^(img|br|hr|link|meta|area|base|basefont|
311
+ col|frame|input|isindex|param)$/ox
312
+ end
313
+
314
+ # Returns a textual representation of the node
315
+ def to_s
316
+ if @closing == :close
317
+ "</#{@name}>"
318
+ else
319
+ s = "<#{@name}"
320
+ @attributes.each do |k, v|
321
+ s << " #{k}"
322
+ s << "=\"#{v}\"" if String === v
323
+ end
324
+ s << " /" if @closing == :self
325
+ s << ">"
326
+ @children.each { |child| s << child.to_s }
327
+ s << "</#{@name}>" if @closing != :self && !@children.empty?
328
+ s
329
+ end
330
+ end
331
+
332
+ # If either the node or any of its children meet the given conditions, the
333
+ # matching node is returned. Otherwise, +nil+ is returned. (See the
334
+ # description of the valid conditions in the +match+ method.)
335
+ def find(conditions)
336
+ match(conditions) && self || super
337
+ end
338
+
339
+ # Returns +true+, indicating that this node represents an HTML tag.
340
+ def tag?
341
+ true
342
+ end
343
+
344
+ # Returns +true+ if the node meets any of the given conditions. The
345
+ # +conditions+ parameter must be a hash of any of the following keys
346
+ # (all are optional):
347
+ #
348
+ # * <tt>:tag</tt>: the node name must match the corresponding value
349
+ # * <tt>:attributes</tt>: a hash. The node's values must match the
350
+ # corresponding values in the hash.
351
+ # * <tt>:parent</tt>: a hash. The node's parent must match the
352
+ # corresponding hash.
353
+ # * <tt>:child</tt>: a hash. At least one of the node's immediate children
354
+ # must meet the criteria described by the hash.
355
+ # * <tt>:ancestor</tt>: a hash. At least one of the node's ancestors must
356
+ # meet the criteria described by the hash.
357
+ # * <tt>:descendant</tt>: a hash. At least one of the node's descendants
358
+ # must meet the criteria described by the hash.
359
+ # * <tt>:sibling</tt>: a hash. At least one of the node's siblings must
360
+ # meet the criteria described by the hash.
361
+ # * <tt>:after</tt>: a hash. The node must be after any sibling meeting
362
+ # the criteria described by the hash, and at least one sibling must match.
363
+ # * <tt>:before</tt>: a hash. The node must be before any sibling meeting
364
+ # the criteria described by the hash, and at least one sibling must match.
365
+ # * <tt>:children</tt>: a hash, for counting children of a node. Accepts the
366
+ # keys:
367
+ # ** <tt>:count</tt>: either a number or a range which must equal (or
368
+ # include) the number of children that match.
369
+ # ** <tt>:less_than</tt>: the number of matching children must be less than
370
+ # this number.
371
+ # ** <tt>:greater_than</tt>: the number of matching children must be
372
+ # greater than this number.
373
+ # ** <tt>:only</tt>: another hash consisting of the keys to use
374
+ # to match on the children, and only matching children will be
375
+ # counted.
376
+ #
377
+ # Conditions are matched using the following algorithm:
378
+ #
379
+ # * if the condition is a string, it must be a substring of the value.
380
+ # * if the condition is a regexp, it must match the value.
381
+ # * if the condition is a number, the value must match number.to_s.
382
+ # * if the condition is +true+, the value must not be +nil+.
383
+ # * if the condition is +false+ or +nil+, the value must be +nil+.
384
+ #
385
+ # Usage:
386
+ #
387
+ # # test if the node is a "span" tag
388
+ # node.match :tag => "span"
389
+ #
390
+ # # test if the node's parent is a "div"
391
+ # node.match :parent => { :tag => "div" }
392
+ #
393
+ # # test if any of the node's ancestors are "table" tags
394
+ # node.match :ancestor => { :tag => "table" }
395
+ #
396
+ # # test if any of the node's immediate children are "em" tags
397
+ # node.match :child => { :tag => "em" }
398
+ #
399
+ # # test if any of the node's descendants are "strong" tags
400
+ # node.match :descendant => { :tag => "strong" }
401
+ #
402
+ # # test if the node has between 2 and 4 span tags as immediate children
403
+ # node.match :children => { :count => 2..4, :only => { :tag => "span" } }
404
+ #
405
+ # # get funky: test to see if the node is a "div", has a "ul" ancestor
406
+ # # and an "li" parent (with "class" = "enum"), and whether or not it has
407
+ # # a "span" descendant that contains # text matching /hello world/:
408
+ # node.match :tag => "div",
409
+ # :ancestor => { :tag => "ul" },
410
+ # :parent => { :tag => "li",
411
+ # :attributes => { :class => "enum" } },
412
+ # :descendant => { :tag => "span",
413
+ # :child => /hello world/ }
414
+ def match(conditions)
415
+ conditions = validate_conditions(conditions)
416
+ # check content of child nodes
417
+ if conditions[:content]
418
+ if children.empty?
419
+ return false unless match_condition("", conditions[:content])
420
+ else
421
+ return false unless children.find { |child| child.match(conditions[:content]) }
422
+ end
423
+ end
424
+
425
+ # test the name
426
+ return false unless match_condition(@name, conditions[:tag]) if conditions[:tag]
427
+
428
+ # test attributes
429
+ (conditions[:attributes] || {}).each do |key, value|
430
+ return false unless match_condition(self[key], value)
431
+ end
432
+
433
+ # test parent
434
+ return false unless parent.match(conditions[:parent]) if conditions[:parent]
435
+
436
+ # test children
437
+ return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child]
438
+
439
+ # test ancestors
440
+ if conditions[:ancestor]
441
+ return false unless catch :found do
442
+ p = self
443
+ throw :found, true if p.match(conditions[:ancestor]) while p = p.parent
444
+ end
445
+ end
446
+
447
+ # test descendants
448
+ if conditions[:descendant]
449
+ return false unless children.find do |child|
450
+ # test the child
451
+ child.match(conditions[:descendant]) ||
452
+ # test the child's descendants
453
+ child.match(:descendant => conditions[:descendant])
454
+ end
455
+ end
456
+
457
+ # count children
458
+ if opts = conditions[:children]
459
+ matches = children.select do |c|
460
+ (c.kind_of?(HTML::Tag) and (c.closing == :self or !c.childless?))
461
+ end
462
+
463
+ matches = matches.select { |c| c.match(opts[:only]) } if opts[:only]
464
+ opts.each do |key, value|
465
+ next if key == :only
466
+ case key
467
+ when :count
468
+ if Integer === value
469
+ return false if matches.length != value
470
+ else
471
+ return false unless value.include?(matches.length)
472
+ end
473
+ when :less_than
474
+ return false unless matches.length < value
475
+ when :greater_than
476
+ return false unless matches.length > value
477
+ else
478
+ raise "unknown count condition #{key}"
479
+ end
480
+ end
481
+ end
482
+
483
+ # test siblings
484
+ if conditions[:sibling] || conditions[:before] || conditions[:after]
485
+ siblings = parent ? parent.children : []
486
+ self_index = siblings.index(self)
487
+
488
+ if conditions[:sibling]
489
+ return false unless siblings.detect do |s|
490
+ s != self && s.match(conditions[:sibling])
491
+ end
492
+ end
493
+
494
+ if conditions[:before]
495
+ return false unless siblings[self_index+1..-1].detect do |s|
496
+ s != self && s.match(conditions[:before])
497
+ end
498
+ end
499
+
500
+ if conditions[:after]
501
+ return false unless siblings[0, self_index].detect do |s|
502
+ s != self && s.match(conditions[:after])
503
+ end
504
+ end
505
+ end
506
+
507
+ true
508
+ end
509
+
510
+ def ==(node)
511
+ return false unless super
512
+ return false unless closing == node.closing && self.name == node.name
513
+ attributes == node.attributes
514
+ end
515
+
516
+ private
517
+ # Match the given value to the given condition.
518
+ def match_condition(value, condition)
519
+ case condition
520
+ when String
521
+ value && value == condition
522
+ when Regexp
523
+ value && value.match(condition)
524
+ when Numeric
525
+ value == condition.to_s
526
+ when true
527
+ !value.nil?
528
+ when false, nil
529
+ value.nil?
530
+ else
531
+ false
532
+ end
533
+ end
534
+ end
535
+ end
536
+ end