tag-extractor 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/tag_extractor.rb +202 -19
  2. metadata +3 -3
@@ -1,39 +1,206 @@
1
+ # Public: TagExtractor module contains various classes to handle tag extraction and manipulation.
2
+ # The class uses the principles of separator and containers as a way to separate tags from the
3
+ # rest of the string.
4
+ #
5
+ # Examples
6
+ #
7
+ # "#social, economy, #physics, #[web development]"
8
+ # # Here we have 3 tags : social, physics, web development.
9
+ # # '#' is the tag separator and [] are the containers,
10
+ # # needed only when tags are composed of multiple words.
11
+ #
12
+ # author - Gabriel Dehan (https://github.com/gabriel-dehan)
13
+ # documentation - https://github.com/gabriel-dehan/TagExtractor
14
+ # version - 1.0.0
15
+ #
16
+ # ______________
17
+ # < Tag Extractor >
18
+ # --------------
19
+ # \ ^__^
20
+ # \(00)\_______
21
+ # (__)\ RUBY )\/\
22
+ # ## ||----w |
23
+ # || ||
24
+ #
25
+ # A minimal ruby library for tag extraction and manipulation.
26
+
1
27
  module TagExtractor
2
- @@separator = nil
28
+ # Public: Constant to be passed to TagExtractor subclasses methods,
29
+ # allowing you to default to the Global Separator you have set through tag_separator=(separator)
30
+ GLOBAL_SEPARATOR = nil
3
31
 
4
- def self.tag_separator=(s)
5
- @@separator = s
6
- end
32
+ @@separator = GLOBAL_SEPARATOR
33
+
34
+ # Public : Constant set with a default separator, namely a sharp symbol (#)
35
+ DEFAULT_SEPARATOR = '#'
36
+
37
+ # Public : Constant set with the default container, namely square brackets ([])
38
+ @@default_container = DEFAULT_CONTAINER = '[]'
39
+
40
+ @@container = @@default_container
41
+
42
+ class << self
43
+ # Public: Sets the String tag separator.
44
+ def tag_separator=(s)
45
+ @@separator = s
46
+ end
47
+
48
+ # Public: Returns the String tag separator.
49
+ def tag_separator
50
+ @@separator || raise(TagSeparatorError)
51
+ end
52
+
53
+ # Public: Sets the String multi-words tag container.
54
+ def words_container=(c)
55
+ @@container = c
56
+ end
57
+ alias :multiwords_container= :words_container=
7
58
 
8
- def self.tag_separator
9
- @@separator || raise(TagSeparatorError)
59
+ # Public: Returns the String multi-words tag container.
60
+ def words_container
61
+ @@container || @@default_container
62
+ end
63
+ alias :multiwords_container :words_container
10
64
  end
11
65
 
66
+ # Public: TagExtractor::StringExtractor class, allows tag extraction from a String.
12
67
  class StringExtractor
68
+ # Public: Returns the original String.
13
69
  attr_reader :source
14
70
 
71
+ # Public: Initialize a StringExtractor.
72
+ #
73
+ # source - A String from which to extract the tags.
15
74
  def initialize(source)
16
75
  @source = source
17
76
  end
18
77
 
19
- def extract_with_separator(separator = nil)
20
- @source.scan(get_regex(separator))
78
+ # Public: Extract tags, along with their separators, from the source.
79
+ #
80
+ # separator - a separator to use for tag extraction.
81
+ # If none specified, it will default to the global separator.
82
+ # container - a container to use for tag extraction.
83
+ # If none specified, it will default to the default container.
84
+ # opts - A hash with options for the extraction (default: { multiword => true } ).
85
+ # :multiword - A boolean to indicate if multiple words tags are to extracted.
86
+ #
87
+ # Returns an Array of tags with separators : ["#tag1", "#[long tag]", "#tag2"]
88
+ def extract_with_separator(separator = nil, container = nil, opts = { multiword: true })
89
+ @source.scan(get_regex(separator, container, opts[:multiword]))
21
90
  end
22
91
 
23
- def extract(separator = nil)
24
- extract_with_separator(separator).collect { |t| t.slice!(0); t }
92
+ # Public: Extract tags, removing their separators.
93
+ #
94
+ # separator - A String separator to use for tag extraction.
95
+ # If none specified, it will default to the global separator.
96
+ # container - A String container to use for tag extraction.
97
+ # If none specified, it will default to the default container.
98
+ # opts - A Hash with options for the extraction (default: { multiword => true } ).
99
+ # :multiword - A Boolean to indicate if multiple words tags are to be extracted.
100
+ #
101
+ # Returns an Array of tags without separators : ["tag1", "long tag", "tag2"]
102
+ def extract(separator = nil, container = nil, opts = { multiword: true })
103
+ tags = extract_with_separator(separator, container, opts)
104
+ remove_separators_in(tags, container: container)
25
105
  end
26
106
 
27
107
  private
28
- def get_regex(separator = nil)
108
+ # Private: provides the regexp used for scanning a tagful string.
109
+ #
110
+ # separator - The String separator used for tag extraction.
111
+ # container - The String container used for tag extraction.
112
+ # multiword - A Boolean to indicate if multiple words tags are to be extracted.
113
+ #
114
+ # Returns a Regexp.
115
+ def get_regex(separator, container, multiword)
116
+ # We get the default separator & containers if none were specified
29
117
  tag_separator = separator || TagExtractor::tag_separator
30
- %r{(?:#{tag_separator})[a-zA-Z](?:\w|-)*}
118
+ tag_container = container || TagExtractor::words_container
119
+
120
+ # Transforms the container string into an array like ['[', ']'].
121
+ left, right = container_array(tag_container)
122
+
123
+ # Word matching regex for simple and multiple words.
124
+ mono_word = '(?:[a-zA-Z](?:\w|-)*)'
125
+ multi_words = '(?:[a-zA-Z](?:\w|-|\s)*)'
126
+
127
+ # Escapes everything.
128
+ left, right, tag_separator = [left, right, tag_separator].map { |s| Regexp::escape(s) }
129
+
130
+ if multiword
131
+ %r(#{tag_separator}(?:#{mono_word}|(?:#{left}{1}#{multi_words}#{right}{1})))
132
+ else
133
+ %r(#{tag_separator}(?:#{mono_word}))
134
+ end
135
+ end
136
+
137
+ # Private: Remove tags separators and containers from a list of tags.
138
+ #
139
+ # tags - An Array of tags.
140
+ # opts - A Hash of options (default: { container => nil }).
141
+ # :container - A String to specify the container from which to extract multiple words tags.
142
+ # If none specified, it will default to the Default or Global words container.
143
+ #
144
+ # Returns an Array of cleaned tags.
145
+ def remove_separators_in tags, opts = { container: nil }
146
+ tag_container = opts[:container] || TagExtractor::words_container
147
+ tags.collect { |t| t.slice!(0); remove_tags_container(t, tag_container) }
148
+ end
149
+
150
+ # Private: Remove tags container from a tag.
151
+ #
152
+ # t - The tag, as a String.
153
+ # c - the container, as a String.
154
+ #
155
+ # Returns the cleaned tag.
156
+ def remove_tags_container(t, c)
157
+ l, r = container_array(c)
158
+ t.gsub!(l, '')
159
+ t.gsub!(r, '')
160
+ t
161
+ end
162
+
163
+ # Private: Transforms the container string into an array.
164
+ #
165
+ # c - the container's String.
166
+ #
167
+ # Examples
168
+ #
169
+ # container_array '[]' # => ['[',']']
170
+ #
171
+ # Returns an Array of two strings.
172
+ def container_array(c)
173
+ c = c || TagExtractor::words_container
174
+ c = c.split ''
31
175
  end
32
176
  end # StringExtractor
33
177
 
178
+ # Public: A class holding methods to handle tags extraction and manipulation from HTML Strings.
179
+ # Inherits from StringExtractor.
34
180
  class HTMLExtractor < StringExtractor
35
- def convert_tags_to_html_links(separator = nil, options = { class: nil }, &block)
36
- @source.gsub!(get_regex(separator)) { |name|
181
+ # Public: Add links around all tags in an HTML String.
182
+ #
183
+ # separator - A specific separator, as a String. If none specified, it defaults to the global separator.
184
+ # container - A specific container, as a String. If none specified, it defaults to the default or global container.
185
+ # options - An Hash of options for the link extraction (default: { class => nil }).
186
+ # :class - A String css class to add to the <a> link tag.
187
+ # :multiword - A Boolean to indicate if multiple words tags are to be extracted.
188
+ # block - A Block used to specify a link dynamicaly. It is passed the cleaned tag string and it should return a String to be injected in the href attribute.
189
+ #
190
+ # Examples
191
+ #
192
+ # # Considering the following string has been used for instanciation :
193
+ # # 'This is a string with #tag1, #tag2'
194
+ # html_extractor.convert_tags_to_html_links('#', :class => 'tag tag-link') do |tag_string|
195
+ # "/tag/#{tag_string}.downcase"
196
+ # end
197
+ # # => 'This is a string with <a class="tag tag-link" href="/tag/tag2">#tag1</a>, <a class="tag tag-link" href="/tag/tag2">#tag2</a>'
198
+ #
199
+ # Returns an HTML String.
200
+ def convert_tags_to_html_links(separator = nil, container = nil, options = { class: nil }, &block)
201
+ multi = options[:multiword] || true
202
+ @source.gsub!(get_regex(separator, container, multi)) { |name|
203
+ name = remove_tags_container(name, container)
37
204
  link = block.call(name.slice(1..-1)) || ''
38
205
  '<a ' + (options[:class].nil? ? '' : 'class="' + options[:class] + '" ') + 'href="' + link + '">' + name + '</a>'
39
206
  }
@@ -41,6 +208,7 @@ module TagExtractor
41
208
  alias :linkify_tags :convert_tags_to_html_links
42
209
  end
43
210
 
211
+ # Private : TagExtractor specific Error and Exceptions.
44
212
  class TagSeparatorError < StandardError
45
213
  def initialize
46
214
  super "Could not find any tag separator"
@@ -49,16 +217,31 @@ module TagExtractor
49
217
  end
50
218
 
51
219
  class String
52
- def extract_tags(separator = nil, with_separator = false)
220
+ # Public: Native String helper for TagExtractor::StringExtractor#extract_tags.
221
+ #
222
+ # separator - a separator to use for tag extraction.
223
+ # If none specified, it will default to the global separator.
224
+ # container - a container to use for tag extraction.
225
+ # If none specified, it will default to the default container.
226
+ # opts - A hash with options for the extraction (default: { multiword => true } ).
227
+ # :multiword - A boolean to indicate if multiple words tags are to extracted.
228
+ # with_separator - A Boolean specifying if the tags are to be return with or without separators (default: false).
229
+ #
230
+ # Returns an Array of tags : ["#tag1", "#[long tag]", "#tag2"] or ["tag1", "long tag", "tag2"].
231
+ def extract_tags(separator = nil, container = nil, opts = { multiword: true }, with_separator = false)
53
232
  if with_separator
54
- TagExtractor::StringExtractor.new(self).extract_with_separator(separator)
233
+ TagExtractor::StringExtractor.new(self).extract_with_separator(separator, container, opts)
55
234
  else
56
- TagExtractor::StringExtractor.new(self).extract(separator)
235
+ TagExtractor::StringExtractor.new(self).extract(separator, container, opts)
57
236
  end
58
237
  end
59
238
 
60
- def convert_tags_to_html_links(separator = nil, &block)
61
- TagExtractor::HTMLExtractor.new(self).convert_tags_to_html_links(separator, &block)
239
+ # Public: Native String helper for TagExtractor::HTMLExtractor#convert_tags_to_html_links.
240
+ # See API for TagExtractor::HTMLExtractor#convert_tags_to_html_links
241
+ #
242
+ # Returns an HTML String.
243
+ def convert_tags_to_html_links(separator = nil, container = nil, opts = { multiword: true }, &block)
244
+ TagExtractor::HTMLExtractor.new(self).convert_tags_to_html_links(separator, container, opts, &block)
62
245
  end
63
246
  alias :linkify_tags :convert_tags_to_html_links
64
247
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tag-extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 1.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-09-04 00:00:00.000000000 Z
12
+ date: 2012-11-14 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Allow tag extraction and tag conversion in ruby
15
15
  email: dehan.gabriel@gmail.com
@@ -38,7 +38,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
38
38
  version: '0'
39
39
  requirements: []
40
40
  rubyforge_project:
41
- rubygems_version: 1.8.24
41
+ rubygems_version: 1.8.15
42
42
  signing_key:
43
43
  specification_version: 3
44
44
  summary: A minimal ruby library for tag extraction