tag-extractor 0.1.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/tag_extractor.rb +202 -19
  2. metadata +3 -3
@@ -1,39 +1,206 @@
1
+ # Public: TagExtractor module contains various classes to handle tag extraction and manipulation.
2
+ # The class uses the principles of separator and containers as a way to separate tags from the
3
+ # rest of the string.
4
+ #
5
+ # Examples
6
+ #
7
+ # "#social, economy, #physics, #[web development]"
8
+ # # Here we have 3 tags : social, physics, web development.
9
+ # # '#' is the tag separator and [] are the containers,
10
+ # # needed only when tags are composed of multiple words.
11
+ #
12
+ # author - Gabriel Dehan (https://github.com/gabriel-dehan)
13
+ # documentation - https://github.com/gabriel-dehan/TagExtractor
14
+ # version - 1.0.0
15
+ #
16
+ # ______________
17
+ # < Tag Extractor >
18
+ # --------------
19
+ # \ ^__^
20
+ # \(00)\_______
21
+ # (__)\ RUBY )\/\
22
+ # ## ||----w |
23
+ # || ||
24
+ #
25
+ # A minimal ruby library for tag extraction and manipulation.
26
+
1
27
  module TagExtractor
2
- @@separator = nil
28
+ # Public: Constant to be passed to TagExtractor subclasses methods,
29
+ # allowing you to default to the Global Separator you have set through tag_separator=(separator)
30
+ GLOBAL_SEPARATOR = nil
3
31
 
4
- def self.tag_separator=(s)
5
- @@separator = s
6
- end
32
+ @@separator = GLOBAL_SEPARATOR
33
+
34
+ # Public : Constant set with a default separator, namely a sharp symbol (#)
35
+ DEFAULT_SEPARATOR = '#'
36
+
37
+ # Public : Constant set with the default container, namely square brackets ([])
38
+ @@default_container = DEFAULT_CONTAINER = '[]'
39
+
40
+ @@container = @@default_container
41
+
42
+ class << self
43
+ # Public: Sets the String tag separator.
44
+ def tag_separator=(s)
45
+ @@separator = s
46
+ end
47
+
48
+ # Public: Returns the String tag separator.
49
+ def tag_separator
50
+ @@separator || raise(TagSeparatorError)
51
+ end
52
+
53
+ # Public: Sets the String multi-words tag container.
54
+ def words_container=(c)
55
+ @@container = c
56
+ end
57
+ alias :multiwords_container= :words_container=
7
58
 
8
- def self.tag_separator
9
- @@separator || raise(TagSeparatorError)
59
+ # Public: Returns the String multi-words tag container.
60
+ def words_container
61
+ @@container || @@default_container
62
+ end
63
+ alias :multiwords_container :words_container
10
64
  end
11
65
 
66
+ # Public: TagExtractor::StringExtractor class, allows tag extraction from a String.
12
67
  class StringExtractor
68
+ # Public: Returns the original String.
13
69
  attr_reader :source
14
70
 
71
+ # Public: Initialize a StringExtractor.
72
+ #
73
+ # source - A String from which to extract the tags.
15
74
  def initialize(source)
16
75
  @source = source
17
76
  end
18
77
 
19
- def extract_with_separator(separator = nil)
20
- @source.scan(get_regex(separator))
78
+ # Public: Extract tags, along with their separators, from the source.
79
+ #
80
+ # separator - a separator to use for tag extraction.
81
+ # If none specified, it will default to the global separator.
82
+ # container - a container to use for tag extraction.
83
+ # If none specified, it will default to the default container.
84
+ # opts - A hash with options for the extraction (default: { multiword => true } ).
85
+ # :multiword - A boolean to indicate if multiple words tags are to extracted.
86
+ #
87
+ # Returns an Array of tags with separators : ["#tag1", "#[long tag]", "#tag2"]
88
+ def extract_with_separator(separator = nil, container = nil, opts = { multiword: true })
89
+ @source.scan(get_regex(separator, container, opts[:multiword]))
21
90
  end
22
91
 
23
- def extract(separator = nil)
24
- extract_with_separator(separator).collect { |t| t.slice!(0); t }
92
+ # Public: Extract tags, removing their separators.
93
+ #
94
+ # separator - A String separator to use for tag extraction.
95
+ # If none specified, it will default to the global separator.
96
+ # container - A String container to use for tag extraction.
97
+ # If none specified, it will default to the default container.
98
+ # opts - A Hash with options for the extraction (default: { multiword => true } ).
99
+ # :multiword - A Boolean to indicate if multiple words tags are to be extracted.
100
+ #
101
+ # Returns an Array of tags without separators : ["tag1", "long tag", "tag2"]
102
+ def extract(separator = nil, container = nil, opts = { multiword: true })
103
+ tags = extract_with_separator(separator, container, opts)
104
+ remove_separators_in(tags, container: container)
25
105
  end
26
106
 
27
107
  private
28
- def get_regex(separator = nil)
108
+ # Private: provides the regexp used for scanning a tagful string.
109
+ #
110
+ # separator - The String separator used for tag extraction.
111
+ # container - The String container used for tag extraction.
112
+ # multiword - A Boolean to indicate if multiple words tags are to be extracted.
113
+ #
114
+ # Returns a Regexp.
115
+ def get_regex(separator, container, multiword)
116
+ # We get the default separator & containers if none were specified
29
117
  tag_separator = separator || TagExtractor::tag_separator
30
- %r{(?:#{tag_separator})[a-zA-Z](?:\w|-)*}
118
+ tag_container = container || TagExtractor::words_container
119
+
120
+ # Transforms the container string into an array like ['[', ']'].
121
+ left, right = container_array(tag_container)
122
+
123
+ # Word matching regex for simple and multiple words.
124
+ mono_word = '(?:[a-zA-Z](?:\w|-)*)'
125
+ multi_words = '(?:[a-zA-Z](?:\w|-|\s)*)'
126
+
127
+ # Escapes everything.
128
+ left, right, tag_separator = [left, right, tag_separator].map { |s| Regexp::escape(s) }
129
+
130
+ if multiword
131
+ %r(#{tag_separator}(?:#{mono_word}|(?:#{left}{1}#{multi_words}#{right}{1})))
132
+ else
133
+ %r(#{tag_separator}(?:#{mono_word}))
134
+ end
135
+ end
136
+
137
+ # Private: Remove tags separators and containers from a list of tags.
138
+ #
139
+ # tags - An Array of tags.
140
+ # opts - A Hash of options (default: { container => nil }).
141
+ # :container - A String to specify the container from which to extract multiple words tags.
142
+ # If none specified, it will default to the Default or Global words container.
143
+ #
144
+ # Returns an Array of cleaned tags.
145
+ def remove_separators_in tags, opts = { container: nil }
146
+ tag_container = opts[:container] || TagExtractor::words_container
147
+ tags.collect { |t| t.slice!(0); remove_tags_container(t, tag_container) }
148
+ end
149
+
150
+ # Private: Remove tags container from a tag.
151
+ #
152
+ # t - The tag, as a String.
153
+ # c - the container, as a String.
154
+ #
155
+ # Returns the cleaned tag.
156
+ def remove_tags_container(t, c)
157
+ l, r = container_array(c)
158
+ t.gsub!(l, '')
159
+ t.gsub!(r, '')
160
+ t
161
+ end
162
+
163
+ # Private: Transforms the container string into an array.
164
+ #
165
+ # c - the container's String.
166
+ #
167
+ # Examples
168
+ #
169
+ # container_array '[]' # => ['[',']']
170
+ #
171
+ # Returns an Array of two strings.
172
+ def container_array(c)
173
+ c = c || TagExtractor::words_container
174
+ c = c.split ''
31
175
  end
32
176
  end # StringExtractor
33
177
 
178
+ # Public: A class holding methods to handle tags extraction and manipulation from HTML Strings.
179
+ # Inherits from StringExtractor.
34
180
  class HTMLExtractor < StringExtractor
35
- def convert_tags_to_html_links(separator = nil, options = { class: nil }, &block)
36
- @source.gsub!(get_regex(separator)) { |name|
181
+ # Public: Add links around all tags in an HTML String.
182
+ #
183
+ # separator - A specific separator, as a String. If none specified, it defaults to the global separator.
184
+ # container - A specific container, as a String. If none specified, it defaults to the default or global container.
185
+ # options - An Hash of options for the link extraction (default: { class => nil }).
186
+ # :class - A String css class to add to the <a> link tag.
187
+ # :multiword - A Boolean to indicate if multiple words tags are to be extracted.
188
+ # block - A Block used to specify a link dynamicaly. It is passed the cleaned tag string and it should return a String to be injected in the href attribute.
189
+ #
190
+ # Examples
191
+ #
192
+ # # Considering the following string has been used for instanciation :
193
+ # # 'This is a string with #tag1, #tag2'
194
+ # html_extractor.convert_tags_to_html_links('#', :class => 'tag tag-link') do |tag_string|
195
+ # "/tag/#{tag_string}.downcase"
196
+ # end
197
+ # # => 'This is a string with <a class="tag tag-link" href="/tag/tag2">#tag1</a>, <a class="tag tag-link" href="/tag/tag2">#tag2</a>'
198
+ #
199
+ # Returns an HTML String.
200
+ def convert_tags_to_html_links(separator = nil, container = nil, options = { class: nil }, &block)
201
+ multi = options[:multiword] || true
202
+ @source.gsub!(get_regex(separator, container, multi)) { |name|
203
+ name = remove_tags_container(name, container)
37
204
  link = block.call(name.slice(1..-1)) || ''
38
205
  '<a ' + (options[:class].nil? ? '' : 'class="' + options[:class] + '" ') + 'href="' + link + '">' + name + '</a>'
39
206
  }
@@ -41,6 +208,7 @@ module TagExtractor
41
208
  alias :linkify_tags :convert_tags_to_html_links
42
209
  end
43
210
 
211
+ # Private : TagExtractor specific Error and Exceptions.
44
212
  class TagSeparatorError < StandardError
45
213
  def initialize
46
214
  super "Could not find any tag separator"
@@ -49,16 +217,31 @@ module TagExtractor
49
217
  end
50
218
 
51
219
  class String
52
- def extract_tags(separator = nil, with_separator = false)
220
+ # Public: Native String helper for TagExtractor::StringExtractor#extract_tags.
221
+ #
222
+ # separator - a separator to use for tag extraction.
223
+ # If none specified, it will default to the global separator.
224
+ # container - a container to use for tag extraction.
225
+ # If none specified, it will default to the default container.
226
+ # opts - A hash with options for the extraction (default: { multiword => true } ).
227
+ # :multiword - A boolean to indicate if multiple words tags are to extracted.
228
+ # with_separator - A Boolean specifying if the tags are to be return with or without separators (default: false).
229
+ #
230
+ # Returns an Array of tags : ["#tag1", "#[long tag]", "#tag2"] or ["tag1", "long tag", "tag2"].
231
+ def extract_tags(separator = nil, container = nil, opts = { multiword: true }, with_separator = false)
53
232
  if with_separator
54
- TagExtractor::StringExtractor.new(self).extract_with_separator(separator)
233
+ TagExtractor::StringExtractor.new(self).extract_with_separator(separator, container, opts)
55
234
  else
56
- TagExtractor::StringExtractor.new(self).extract(separator)
235
+ TagExtractor::StringExtractor.new(self).extract(separator, container, opts)
57
236
  end
58
237
  end
59
238
 
60
- def convert_tags_to_html_links(separator = nil, &block)
61
- TagExtractor::HTMLExtractor.new(self).convert_tags_to_html_links(separator, &block)
239
+ # Public: Native String helper for TagExtractor::HTMLExtractor#convert_tags_to_html_links.
240
+ # See API for TagExtractor::HTMLExtractor#convert_tags_to_html_links
241
+ #
242
+ # Returns an HTML String.
243
+ def convert_tags_to_html_links(separator = nil, container = nil, opts = { multiword: true }, &block)
244
+ TagExtractor::HTMLExtractor.new(self).convert_tags_to_html_links(separator, container, opts, &block)
62
245
  end
63
246
  alias :linkify_tags :convert_tags_to_html_links
64
247
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tag-extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 1.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-09-04 00:00:00.000000000 Z
12
+ date: 2012-11-14 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Allow tag extraction and tag conversion in ruby
15
15
  email: dehan.gabriel@gmail.com
@@ -38,7 +38,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
38
38
  version: '0'
39
39
  requirements: []
40
40
  rubyforge_project:
41
- rubygems_version: 1.8.24
41
+ rubygems_version: 1.8.15
42
42
  signing_key:
43
43
  specification_version: 3
44
44
  summary: A minimal ruby library for tag extraction