arrogance 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in arrogance.gemspec
4
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/arrogance.gemspec ADDED
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "arrogance/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "arrogance"
7
+ s.version = Arrogance::VERSION
8
+ s.authors = ["Gideon Providence"]
9
+ s.email = ["jprovidence@live.ca"]
10
+ s.homepage = ""
11
+ s.summary = %q{A tool to easily manage RSS feeds of all kinds}
12
+ s.description = %q{Easily manage RSS and Atom feeds of all kinds.}
13
+
14
+ s.rubyforge_project = "arrogance"
15
+ s.add_dependency 'nokogiri'
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ # specify any dependencies here; for example:
23
+ # s.add_development_dependency "rspec"
24
+ # s.add_runtime_dependency "rest-client"
25
+ end
@@ -0,0 +1,81 @@
1
+ module Arrogance
2
+
3
+ class BlogObject
4
+ attr_accessor :aoh, :data, :positions
5
+
6
+ def initialize(array_of_hashes, data)
7
+ @aoh, @data = array_of_hashes, data
8
+ @positions = {:first => 0,
9
+ :second => 1,
10
+ :third => 2,
11
+ :fourth => 3,
12
+ :fifth => 4,
13
+ :sixth => 5,
14
+ :seventh => 6,
15
+ :eighth => 7,
16
+ :ninth => 8,
17
+ :tenth => 10}
18
+ end
19
+
20
+ def guts
21
+ return @aoh
22
+ end
23
+
24
+ def most_recent
25
+ return @aoh[0]
26
+ end
27
+
28
+ def least_recent
29
+ return @aoh[@aoh.length - 1]
30
+ end
31
+
32
+ def last
33
+ return least_recent
34
+ end
35
+
36
+ def feed_title
37
+ return @data.feed_title
38
+ end
39
+
40
+ def site_link
41
+ return @data.site_link
42
+ end
43
+
44
+ def feed_image
45
+ return @data.feed_image
46
+ end
47
+
48
+ def get(*args)
49
+ ary = []
50
+ if args.length > 1
51
+ args.each do |pos|
52
+ ary << @aoh[@positions[pos.intern]]
53
+ end
54
+ else
55
+ return @aoh[@positions[args[0].intern]]
56
+ end
57
+ return ary
58
+ end
59
+
60
+ def method_missing(meth, *args, &block)
61
+ if contains_positions(meth.to_s)
62
+ if meth.to_s.split('_and_').length > 1
63
+ return meth.to_s.split('_and_').collect {|arg| get(arg)}
64
+ else
65
+ return get(meth.to_s)
66
+ end
67
+ else
68
+ super
69
+ end
70
+ end
71
+
72
+ #-- internal methods. I would usually make these private, but I was once the recipient of a rant
73
+ #-- on how private methods are Ruby's version of Satan. I shall refrain from marking them private.
74
+ def contains_positions(str)
75
+ str.split('_and_').each {|s| return false unless @positions.keys.collect {|k| k.to_s}.include?(s)}
76
+ return true
77
+ end
78
+
79
+ end
80
+
81
+ end
@@ -0,0 +1,311 @@
1
+ module Arrogance
2
+
3
+
4
+ #-- This class uses a bunch of hand coded rules to establish which tag is which.
5
+ #-- You may think that this would be super easy, but alot of folks use unconventional
6
+ #-- tag names, and there are even variations on typical tags:
7
+ #-- Eg:
8
+ #-- <link>www.link.com</link>
9
+ #-- VS:
10
+ #-- <link www.link.com /> <-- if you do this, please stop. It makes life less delightful
11
+
12
+ class TagHandler
13
+
14
+ #-- clutter
15
+ require 'rubygems'
16
+ require 'nokogiri'
17
+ require 'open-uri'
18
+ attr_accessor :url, :date, :title, :author, :description, :link
19
+
20
+
21
+ #-- Add mode method to Enumerable. Don't you love ruby? :)
22
+ Enumerable.class_eval do
23
+ def mode
24
+ group_by do |e|
25
+ e
26
+ end.values.max_by(&:size).first
27
+ end
28
+ end
29
+
30
+ def initialize(url)
31
+ @url = url
32
+ discover_tags
33
+ end
34
+
35
+ def discover_tags
36
+ doc = Nokogiri::XML(open(self.url)).remove_namespaces!
37
+ set = (doc.xpath('//item').empty? ? doc.xpath('//entry') : doc.xpath('//item'))
38
+ discover_author_tag(set, doc)
39
+ discover_link_tag(set)
40
+ discover_description_tag(set)
41
+ discover_title_tag
42
+ discover_date_tag(set)
43
+ end
44
+
45
+ #-- get author_tag
46
+ def discover_author_tag(set, doc)
47
+ ary = []
48
+ set.each do |node|
49
+ node.children.each do |child|
50
+ str, class_ary = child.to_s.gsub(/<.*?>/, ''), []
51
+ str.split(//).each {|s| class_ary << s.class }
52
+ unless class_ary.empty?
53
+ if /<.*?>/.match(child.to_s) && class_ary.mode != Fixnum && str.split(//).length < 20
54
+ not_tags = ['title', 'total', 'category', 'comments', 'updated', 'id']
55
+ tag = /<.*?>/.match(child.to_s).to_s.gsub(/(<|>)/, '').strip.split(' ')[0] #string brutality...
56
+ unless not_tags.include?(tag)
57
+ ary << tag
58
+ end
59
+ end
60
+ end
61
+ end
62
+ unless ary.empty?
63
+ self.author = ary.mode.to_s.gsub(/(<|>)/, '')
64
+ else
65
+ if (auth = doc.xpath('//author//name')[0].to_s) && ! auth.empty?
66
+ self.author = '//author//name'
67
+ else
68
+ self.author = 'none'
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ #-- get link tag
75
+ def discover_link_tag(set)
76
+ ary = []
77
+ not_links = ['id', 'author']
78
+ set.each do |node|
79
+ node.children.each do |child|
80
+ str = /<.*?>/.match(child.to_s).to_s.gsub(/<.*?>/, '')
81
+ if str.length < 100 && /<.*?>/.match(child.to_s).to_s.gsub(/(<|>)/, '').strip.split(' ')[0] != 'content'
82
+ if /http:/.match(child.to_s) || /www./.match(child.to_s)
83
+ unless not_links.include?(/<.*?>/.match(child.to_s).to_s.gsub(/(<|>)/, ''))
84
+ ary << (/<.*?>/.match(child.to_s).to_s)
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+ unless ary.empty?
91
+ if ary.include?('<origLink>')
92
+ self.link = 'origLink'
93
+ elsif !ary.include?('<origLink>') && ary.include?('<link>')
94
+ self.link = 'link'
95
+ else
96
+ self.link = (ary.mode.to_s.split(' ').length > 1 ? 'manual' : ary.mode.to_s.gsub(/(<|>)/, ''))
97
+ end
98
+ else
99
+ self.link = ''
100
+ end
101
+ end
102
+
103
+ #-- get description tag
104
+ def discover_description_tag(set)
105
+ ary, hash = [], {}
106
+ set.each do |node|
107
+ node.children.each do |child|
108
+ hash[/<.*?>/.match(child.to_s).to_s] ||= 0
109
+ hash[/<.*?>/.match(child.to_s).to_s] += child.to_s.length
110
+ end
111
+ largest = ''
112
+ hash.each_with_index do |kv, idx|
113
+ if idx == 0
114
+ largest = kv[0]
115
+ else
116
+ if kv[1] > hash[largest]
117
+ largest = kv[0]
118
+ end
119
+ end
120
+ end
121
+ ary << largest
122
+ end
123
+ self.description = ary.mode.to_s.gsub(/(<|>)/, '').strip.split(' ')[0]
124
+ end
125
+
126
+ #-- get title tag, its always title.
127
+ def discover_title_tag
128
+ return self.title = 'title'
129
+ end
130
+
131
+
132
+ #-- get date tag
133
+ def discover_date_tag(set)
134
+ ary = []
135
+ set.each do |node|
136
+ node.children.each do |child|
137
+ str = /<.*?>/.match(child.to_s)
138
+ if /[^(u(P|p)|U(P|p)](D|d)ate/.match(str.to_s) && str.to_s.length < 20
139
+ ary << str
140
+ end
141
+ end
142
+ end
143
+ if ary.empty?
144
+ self.date = 'updated'
145
+ else
146
+ self.date = ary.mode.to_s.gsub(/(<|>)/, '')
147
+ end
148
+ self.date = 'updated' unless self.date.length > 1
149
+ return self.date
150
+ end
151
+
152
+ end
153
+
154
+
155
+ #-- This gathers information on the feed, such as the site link and feed image
156
+ class DataHandler
157
+ require 'open-uri'
158
+ require 'rubygems'
159
+ require 'nokogiri'
160
+ attr_accessor :url, :feed_title, :feed_image, :site_link
161
+
162
+ def initialize(url)
163
+ self.url = url
164
+ discover_data
165
+ end
166
+
167
+ def discover_data
168
+ doc = Nokogiri::XML(open(self.url)).remove_namespaces!
169
+ [:discover_feed_title, :discover_feed_image, :discover_site_link].each do |meth|
170
+ self.send(meth, doc)
171
+ end
172
+ end
173
+
174
+ def discover_feed_title(doc)
175
+ xpath = doc.xpath('//channel//title')
176
+ unless (ft = xpath.inner_text).empty?
177
+ self.feed_title = doc.xpath('//channel//title').inner_text.to_s
178
+ if ft.length > 50
179
+ self.feed_title = xpath[0].inner_text
180
+ end
181
+ else
182
+ self.feed_title = ''
183
+ end
184
+ end
185
+
186
+ def discover_feed_image(doc)
187
+ unless (img = doc.xpath('//channel//image//url').inner_text).empty?
188
+ self.feed_image = img.to_s
189
+ else
190
+ #-- for some reason this is causing a seg-fault. hope to get to the bottom of it
191
+ #-- in fact, someone should really get to the bottom of why ruby seg-faults on most
192
+ #-- gems written in C. C is for cool, guys, lets show it some love.
193
+
194
+ #unless doc.xpath('//channel//image').to_s.empty?
195
+ # tag = doc.xpath('//channel//image').to_s
196
+ # self.feed_image = /".*?"/.match(tag).to_s
197
+ #else
198
+ self.feed_image = ''
199
+ #end
200
+ end
201
+ end
202
+
203
+ def discover_site_link(doc)
204
+ link = doc.xpath('//channel//link').inner_text.to_s
205
+ if link.length > 49
206
+ link = /(.*?.com).*?/.match(link)[0]
207
+ end
208
+ self.site_link = link
209
+ end
210
+ end
211
+
212
+ class PostHandler
213
+ require 'rubygems'
214
+ require 'nokogiri'
215
+ attr_accessor :url, :posts
216
+
217
+ def initialize(url, tag_set)
218
+ self.url = url
219
+ init(tag_set)
220
+ end
221
+
222
+ def init(tag_set)
223
+ doc, ary = Nokogiri::XML(open(self.url)).remove_namespaces!, []
224
+ unless doc.xpath('//entry').empty?
225
+ doc.xpath('//entry').each_with_index do |i, idx|
226
+ ary << build_post(i, idx, tag_set)
227
+ end
228
+ end
229
+ unless doc.xpath('//item').empty?
230
+ doc.xpath('//item').each_with_index do |i, idx|
231
+ ary << build_post(i, idx, tag_set)
232
+ end
233
+ end
234
+ self.posts = ary
235
+ end
236
+
237
+ def build_post(item, idx, tag_set)
238
+ hash = {}
239
+ unless tag_set.title.nil?
240
+ hash[:title] = item.xpath(tag_set.title).inner_text.to_s
241
+ end
242
+ unless tag_set.author.nil?
243
+ if tag_set.author == 'none'
244
+ hash[:author] == 'Unknown'
245
+ else
246
+ if tag_set.author == '//author//name'
247
+ if item.xpath(tag_set.author).length == 1
248
+ hash[:author] = item.xpath(tag_set.author).inner_text.to_s
249
+ else
250
+ hash[:author] = item.xpath(tag_set.author)[idx].inner_text.to_s
251
+ end
252
+ else
253
+ hash[:author] = item.xpath(tag_set.author).inner_text.to_s
254
+ end
255
+ end
256
+ end
257
+ unless tag_set.description.nil?
258
+ hash[:description] = item.xpath(tag_set.description).inner_text.to_s
259
+ end
260
+ unless tag_set.date.nil?
261
+ hash[:date] = item.xpath(tag_set.date).inner_text.to_s
262
+ if hash[:date].nil? || hash[:date].empty?
263
+ if (m = /[0-9]\{4, 4\}-[0-9]\{2,2\}-[0-9]\{2, 2\}/.match(item.xpath('id').to_s).to_s)
264
+ hash[:date] = /,.*?:/.match(item.xpath('id').to_s).to_s.gsub(/(,|:)/, '')
265
+ end
266
+ end
267
+ end
268
+ unless tag_set.link.nil?
269
+ if tag_set.link == 'manual'
270
+ item.xpath('link').each do |link|
271
+ if /rel=/.match(link.to_s)
272
+ if /type=/.match(link.to_s)
273
+ hash[:link] = /("|')http.*?("|')/.match(link.to_s).to_s.gsub(/('|")/, '')
274
+ break
275
+ elsif /href=/.match(link.to_s) && /alternate/.match(link.to_s)
276
+ hash[:link] = /("|')http.*?("|')/.match(link.to_s).to_s.gsub(/("|')/, '')
277
+ break
278
+ end
279
+ end
280
+ end
281
+ if hash[:link].nil? || hash[:link].empty?
282
+ item.xpath('link').each do |link|
283
+ if /href=/.match(link.to_s)
284
+ hash[:link] = /("|').*?("|')/.match(link.to_s).to_s.gsub(/("|')/, '')
285
+ break
286
+ end
287
+ end
288
+ end
289
+ else
290
+ hash[:link] = item.xpath(tag_set.link).inner_text.to_s
291
+ if hash[:link].nil? || hash[:link].empty?
292
+ item.xpath('link').each do |link|
293
+ if /type=("|')text\/html("|')/.match(link.to_s)
294
+ if /rel=("|')alternate("|') /.match(link.to_s)
295
+ hash[:link] = /("|').*?("|')/.match(link.to_s).to_s
296
+ break
297
+ end
298
+ end
299
+ end
300
+ end
301
+ end
302
+ end
303
+ [:link, :date, :description, :author, :title].each do |i|
304
+ if !hash[i]
305
+ hash[i] = ''
306
+ end
307
+ end
308
+ return hash
309
+ end
310
+ end
311
+ end
@@ -0,0 +1,3 @@
1
+ module Arrogance
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,15 @@
1
+ module Arrogance
2
+
3
+ class Wrangler
4
+
5
+ class << self
6
+ def beat_into_submission(url)
7
+ tags = TagHandler.new(url)
8
+ data = DataHandler.new(url)
9
+ posts = PostHandler.new(url, tags)
10
+ blog_obj = BlogObject.new(posts.posts, data)
11
+ end
12
+ end
13
+ end
14
+
15
+ end
data/lib/arrogance.rb ADDED
@@ -0,0 +1,64 @@
1
+ require "arrogance/version"
2
+ require "arrogance/wrangler"
3
+ require "arrogance/tools"
4
+ require "arrogance/objects"
5
+
6
+ module Arrogance
7
+ class Lasso
8
+
9
+ #-- @param urls -> A list of urls. This param can take one of two forms:
10
+ #-- 1: Hash structured as follows:
11
+ #-- {:term => [Array, of, urls],
12
+ #-- :barack_obamas_second_term => [Another, array, of, urls]
13
+ #-- :etc ...
14
+ #-- }
15
+ #-- 2: Array:
16
+ #-- [Array, of, urls, that, can, be, as, long, as, you, want]
17
+
18
+ #-- Returns: In either case, the return will be of the same structure.
19
+ #-- However, any array(s) of urls passed in will be replaced (non-destructive)
20
+ #-- with array(s) of blogobjects:
21
+ #-- {:term => [BlogObject<#*@&#>, BlogObject<@&^#^@>, BlogObject<rubbish_chars>]}
22
+ #-- OR
23
+ #-- [BlogObject<#*@&#>, BlogObject<@&^#^@>, BlogObject<rubbish_chars>]
24
+
25
+ #NOTE: The returned BlogObject(s) can be manipulated as follows
26
+ #my_blog_object.most_recent
27
+ #-- => {:title => 'Blogger stating opinions!', :date => '2011-11-11...', :etc => '...'} <-- Thats a hash, treat it like one
28
+ #-- You can retrive individual posts by stating their position in reverse chronological order (newest first)
29
+ #my_blog_object.first
30
+ #-- => {:title => 'Blogger stating opinions!', :date => '2011-11-11 12:34:25', :etc => '...'}
31
+ #my_blog_object.first_and_seventh
32
+ #-- => [{:title => 'Blogger stating opinions', :yada => 'yada'}, {:title => 'Look at ma kittehs!!1!one', ...}]
33
+ #-- feel free to get rediculous:
34
+ #my_blog_object.first_and_second_and_fourth_and_ninth
35
+ #-- Arrogance will return a a max of 10
36
+ #-- Additionally, you can retrive the feed title, site link and feed_image (if they exist) by calling
37
+ #feed_title, #site_link and #feed_image methods of the BlogObject
38
+ #-- The guts of a BlogObject is really just an array of hashes. If you just want to deal with that yourself
39
+ #-- and not take advantage of all the cool methods I made for you, thats fine. Just call my_blog_object.guts
40
+ #-- and you can get that array of hashes to play with.
41
+
42
+ class << self
43
+ def read(urls)
44
+ if urls.kind_of? Hash
45
+ return build_a_hash_response urls
46
+ elsif urls.kind_of? Array
47
+ return build_an_array_reponse urls
48
+ end
49
+ end
50
+
51
+ def build_a_hash_response(urls)
52
+ arrogance = {}
53
+ urls.each do |k, v|
54
+ arrogance[k] = v.collect {|url| Wrangler.beat_into_submission(url)}
55
+ end
56
+ return arrogance
57
+ end
58
+
59
+ def build_an_array_reponse(urls)
60
+ return urls.collect {|url| Wrangler.beat_into_submission(url)}
61
+ end
62
+ end
63
+ end
64
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: arrogance
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.1
6
+ platform: ruby
7
+ authors:
8
+ - Gideon Providence
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-08-17 00:00:00 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ prerelease: false
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ type: :runtime
25
+ version_requirements: *id001
26
+ description: Easily manage RSS and Atom feeds of all kinds.
27
+ email:
28
+ - jprovidence@live.ca
29
+ executables: []
30
+
31
+ extensions: []
32
+
33
+ extra_rdoc_files: []
34
+
35
+ files:
36
+ - .gitignore
37
+ - Gemfile
38
+ - Rakefile
39
+ - arrogance.gemspec
40
+ - lib/arrogance.rb
41
+ - lib/arrogance/objects.rb
42
+ - lib/arrogance/tools.rb
43
+ - lib/arrogance/version.rb
44
+ - lib/arrogance/wrangler.rb
45
+ homepage: ""
46
+ licenses: []
47
+
48
+ post_install_message:
49
+ rdoc_options: []
50
+
51
+ require_paths:
52
+ - lib
53
+ required_ruby_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: "0"
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ requirements: []
66
+
67
+ rubyforge_project: arrogance
68
+ rubygems_version: 1.8.7
69
+ signing_key:
70
+ specification_version: 3
71
+ summary: A tool to easily manage RSS feeds of all kinds
72
+ test_files: []
73
+