arrogance 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in arrogance.gemspec
4
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/arrogance.gemspec ADDED
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "arrogance/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "arrogance"
7
+ s.version = Arrogance::VERSION
8
+ s.authors = ["Gideon Providence"]
9
+ s.email = ["jprovidence@live.ca"]
10
+ s.homepage = ""
11
+ s.summary = %q{A tool to easily manage RSS feeds of all kinds}
12
+ s.description = %q{Easily manage RSS and Atom feeds of all kinds.}
13
+
14
+ s.rubyforge_project = "arrogance"
15
+ s.add_dependency 'nokogiri'
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ # specify any dependencies here; for example:
23
+ # s.add_development_dependency "rspec"
24
+ # s.add_runtime_dependency "rest-client"
25
+ end
@@ -0,0 +1,81 @@
1
+ module Arrogance
2
+
3
+ class BlogObject
4
+ attr_accessor :aoh, :data, :positions
5
+
6
+ def initialize(array_of_hashes, data)
7
+ @aoh, @data = array_of_hashes, data
8
+ @positions = {:first => 0,
9
+ :second => 1,
10
+ :third => 2,
11
+ :fourth => 3,
12
+ :fifth => 4,
13
+ :sixth => 5,
14
+ :seventh => 6,
15
+ :eighth => 7,
16
+ :ninth => 8,
17
+ :tenth => 10}
18
+ end
19
+
20
+ def guts
21
+ return @aoh
22
+ end
23
+
24
+ def most_recent
25
+ return @aoh[0]
26
+ end
27
+
28
+ def least_recent
29
+ return @aoh[@aoh.length - 1]
30
+ end
31
+
32
+ def last
33
+ return least_recent
34
+ end
35
+
36
+ def feed_title
37
+ return @data.feed_title
38
+ end
39
+
40
+ def site_link
41
+ return @data.site_link
42
+ end
43
+
44
+ def feed_image
45
+ return @data.feed_image
46
+ end
47
+
48
+ def get(*args)
49
+ ary = []
50
+ if args.length > 1
51
+ args.each do |pos|
52
+ ary << @aoh[@positions[pos.intern]]
53
+ end
54
+ else
55
+ return @aoh[@positions[args[0].intern]]
56
+ end
57
+ return ary
58
+ end
59
+
60
+ def method_missing(meth, *args, &block)
61
+ if contains_positions(meth.to_s)
62
+ if meth.to_s.split('_and_').length > 1
63
+ return meth.to_s.split('_and_').collect {|arg| get(arg)}
64
+ else
65
+ return get(meth.to_s)
66
+ end
67
+ else
68
+ super
69
+ end
70
+ end
71
+
72
+ #-- internal methods. I would usually make these private, but I was once the recipient of a rant
73
+ #-- on how private methods are Ruby's version of Satan. I shall refrain from marking them private.
74
+ def contains_positions(str)
75
+ str.split('_and_').each {|s| return false unless @positions.keys.collect {|k| k.to_s}.include?(s)}
76
+ return true
77
+ end
78
+
79
+ end
80
+
81
+ end
@@ -0,0 +1,311 @@
1
+ module Arrogance
2
+
3
+
4
+ #-- This class uses a bunch of hand coded rules to establish which tag is which.
5
+ #-- You may think that this would be super easy, but alot of folks use unconventional
6
+ #-- tag names, and there are even variations on typical tags:
7
+ #-- Eg:
8
+ #-- <link>www.link.com</link>
9
+ #-- VS:
10
+ #-- <link www.link.com /> <-- if you do this, please stop. It makes life less delightful
11
+
12
+ class TagHandler
13
+
14
+ #-- clutter
15
+ require 'rubygems'
16
+ require 'nokogiri'
17
+ require 'open-uri'
18
+ attr_accessor :url, :date, :title, :author, :description, :link
19
+
20
+
21
+ #-- Add mode method to Enumerable. Don't you love ruby? :)
22
+ Enumerable.class_eval do
23
+ def mode
24
+ group_by do |e|
25
+ e
26
+ end.values.max_by(&:size).first
27
+ end
28
+ end
29
+
30
+ def initialize(url)
31
+ @url = url
32
+ discover_tags
33
+ end
34
+
35
+ def discover_tags
36
+ doc = Nokogiri::XML(open(self.url)).remove_namespaces!
37
+ set = (doc.xpath('//item').empty? ? doc.xpath('//entry') : doc.xpath('//item'))
38
+ discover_author_tag(set, doc)
39
+ discover_link_tag(set)
40
+ discover_description_tag(set)
41
+ discover_title_tag
42
+ discover_date_tag(set)
43
+ end
44
+
45
+ #-- get author_tag
46
+ def discover_author_tag(set, doc)
47
+ ary = []
48
+ set.each do |node|
49
+ node.children.each do |child|
50
+ str, class_ary = child.to_s.gsub(/<.*?>/, ''), []
51
+ str.split(//).each {|s| class_ary << s.class }
52
+ unless class_ary.empty?
53
+ if /<.*?>/.match(child.to_s) && class_ary.mode != Fixnum && str.split(//).length < 20
54
+ not_tags = ['title', 'total', 'category', 'comments', 'updated', 'id']
55
+ tag = /<.*?>/.match(child.to_s).to_s.gsub(/(<|>)/, '').strip.split(' ')[0] #string brutality...
56
+ unless not_tags.include?(tag)
57
+ ary << tag
58
+ end
59
+ end
60
+ end
61
+ end
62
+ unless ary.empty?
63
+ self.author = ary.mode.to_s.gsub(/(<|>)/, '')
64
+ else
65
+ if (auth = doc.xpath('//author//name')[0].to_s) && ! auth.empty?
66
+ self.author = '//author//name'
67
+ else
68
+ self.author = 'none'
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ #-- get link tag
75
+ def discover_link_tag(set)
76
+ ary = []
77
+ not_links = ['id', 'author']
78
+ set.each do |node|
79
+ node.children.each do |child|
80
+ str = /<.*?>/.match(child.to_s).to_s.gsub(/<.*?>/, '')
81
+ if str.length < 100 && /<.*?>/.match(child.to_s).to_s.gsub(/(<|>)/, '').strip.split(' ')[0] != 'content'
82
+ if /http:/.match(child.to_s) || /www./.match(child.to_s)
83
+ unless not_links.include?(/<.*?>/.match(child.to_s).to_s.gsub(/(<|>)/, ''))
84
+ ary << (/<.*?>/.match(child.to_s).to_s)
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+ unless ary.empty?
91
+ if ary.include?('<origLink>')
92
+ self.link = 'origLink'
93
+ elsif !ary.include?('<origLink>') && ary.include?('<link>')
94
+ self.link = 'link'
95
+ else
96
+ self.link = (ary.mode.to_s.split(' ').length > 1 ? 'manual' : ary.mode.to_s.gsub(/(<|>)/, ''))
97
+ end
98
+ else
99
+ self.link = ''
100
+ end
101
+ end
102
+
103
+ #-- get description tag
104
+ def discover_description_tag(set)
105
+ ary, hash = [], {}
106
+ set.each do |node|
107
+ node.children.each do |child|
108
+ hash[/<.*?>/.match(child.to_s).to_s] ||= 0
109
+ hash[/<.*?>/.match(child.to_s).to_s] += child.to_s.length
110
+ end
111
+ largest = ''
112
+ hash.each_with_index do |kv, idx|
113
+ if idx == 0
114
+ largest = kv[0]
115
+ else
116
+ if kv[1] > hash[largest]
117
+ largest = kv[0]
118
+ end
119
+ end
120
+ end
121
+ ary << largest
122
+ end
123
+ self.description = ary.mode.to_s.gsub(/(<|>)/, '').strip.split(' ')[0]
124
+ end
125
+
126
+ #-- get title tag, its always title.
127
+ def discover_title_tag
128
+ return self.title = 'title'
129
+ end
130
+
131
+
132
+ #-- get date tag
133
+ def discover_date_tag(set)
134
+ ary = []
135
+ set.each do |node|
136
+ node.children.each do |child|
137
+ str = /<.*?>/.match(child.to_s)
138
+ if /[^(u(P|p)|U(P|p)](D|d)ate/.match(str.to_s) && str.to_s.length < 20
139
+ ary << str
140
+ end
141
+ end
142
+ end
143
+ if ary.empty?
144
+ self.date = 'updated'
145
+ else
146
+ self.date = ary.mode.to_s.gsub(/(<|>)/, '')
147
+ end
148
+ self.date = 'updated' unless self.date.length > 1
149
+ return self.date
150
+ end
151
+
152
+ end
153
+
154
+
155
+ #-- This gathers information on the feed, such as the site link and feed image
156
+ class DataHandler
157
+ require 'open-uri'
158
+ require 'rubygems'
159
+ require 'nokogiri'
160
+ attr_accessor :url, :feed_title, :feed_image, :site_link
161
+
162
+ def initialize(url)
163
+ self.url = url
164
+ discover_data
165
+ end
166
+
167
+ def discover_data
168
+ doc = Nokogiri::XML(open(self.url)).remove_namespaces!
169
+ [:discover_feed_title, :discover_feed_image, :discover_site_link].each do |meth|
170
+ self.send(meth, doc)
171
+ end
172
+ end
173
+
174
+ def discover_feed_title(doc)
175
+ xpath = doc.xpath('//channel//title')
176
+ unless (ft = xpath.inner_text).empty?
177
+ self.feed_title = doc.xpath('//channel//title').inner_text.to_s
178
+ if ft.length > 50
179
+ self.feed_title = xpath[0].inner_text
180
+ end
181
+ else
182
+ self.feed_title = ''
183
+ end
184
+ end
185
+
186
+ def discover_feed_image(doc)
187
+ unless (img = doc.xpath('//channel//image//url').inner_text).empty?
188
+ self.feed_image = img.to_s
189
+ else
190
+ #-- for some reason this is causing a seg-fault. hope to get to the bottom of it
191
+ #-- in fact, someone should really get to the bottom of why ruby seg-faults on most
192
+ #-- gems written in C. C is for cool, guys, lets show it some love.
193
+
194
+ #unless doc.xpath('//channel//image').to_s.empty?
195
+ # tag = doc.xpath('//channel//image').to_s
196
+ # self.feed_image = /".*?"/.match(tag).to_s
197
+ #else
198
+ self.feed_image = ''
199
+ #end
200
+ end
201
+ end
202
+
203
+ def discover_site_link(doc)
204
+ link = doc.xpath('//channel//link').inner_text.to_s
205
+ if link.length > 49
206
+ link = /(.*?.com).*?/.match(link)[0]
207
+ end
208
+ self.site_link = link
209
+ end
210
+ end
211
+
212
+ class PostHandler
213
+ require 'rubygems'
214
+ require 'nokogiri'
215
+ attr_accessor :url, :posts
216
+
217
+ def initialize(url, tag_set)
218
+ self.url = url
219
+ init(tag_set)
220
+ end
221
+
222
+ def init(tag_set)
223
+ doc, ary = Nokogiri::XML(open(self.url)).remove_namespaces!, []
224
+ unless doc.xpath('//entry').empty?
225
+ doc.xpath('//entry').each_with_index do |i, idx|
226
+ ary << build_post(i, idx, tag_set)
227
+ end
228
+ end
229
+ unless doc.xpath('//item').empty?
230
+ doc.xpath('//item').each_with_index do |i, idx|
231
+ ary << build_post(i, idx, tag_set)
232
+ end
233
+ end
234
+ self.posts = ary
235
+ end
236
+
237
+ def build_post(item, idx, tag_set)
238
+ hash = {}
239
+ unless tag_set.title.nil?
240
+ hash[:title] = item.xpath(tag_set.title).inner_text.to_s
241
+ end
242
+ unless tag_set.author.nil?
243
+ if tag_set.author == 'none'
244
+ hash[:author] == 'Unknown'
245
+ else
246
+ if tag_set.author == '//author//name'
247
+ if item.xpath(tag_set.author).length == 1
248
+ hash[:author] = item.xpath(tag_set.author).inner_text.to_s
249
+ else
250
+ hash[:author] = item.xpath(tag_set.author)[idx].inner_text.to_s
251
+ end
252
+ else
253
+ hash[:author] = item.xpath(tag_set.author).inner_text.to_s
254
+ end
255
+ end
256
+ end
257
+ unless tag_set.description.nil?
258
+ hash[:description] = item.xpath(tag_set.description).inner_text.to_s
259
+ end
260
+ unless tag_set.date.nil?
261
+ hash[:date] = item.xpath(tag_set.date).inner_text.to_s
262
+ if hash[:date].nil? || hash[:date].empty?
263
+ if (m = /[0-9]\{4, 4\}-[0-9]\{2,2\}-[0-9]\{2, 2\}/.match(item.xpath('id').to_s).to_s)
264
+ hash[:date] = /,.*?:/.match(item.xpath('id').to_s).to_s.gsub(/(,|:)/, '')
265
+ end
266
+ end
267
+ end
268
+ unless tag_set.link.nil?
269
+ if tag_set.link == 'manual'
270
+ item.xpath('link').each do |link|
271
+ if /rel=/.match(link.to_s)
272
+ if /type=/.match(link.to_s)
273
+ hash[:link] = /("|')http.*?("|')/.match(link.to_s).to_s.gsub(/('|")/, '')
274
+ break
275
+ elsif /href=/.match(link.to_s) && /alternate/.match(link.to_s)
276
+ hash[:link] = /("|')http.*?("|')/.match(link.to_s).to_s.gsub(/("|')/, '')
277
+ break
278
+ end
279
+ end
280
+ end
281
+ if hash[:link].nil? || hash[:link].empty?
282
+ item.xpath('link').each do |link|
283
+ if /href=/.match(link.to_s)
284
+ hash[:link] = /("|').*?("|')/.match(link.to_s).to_s.gsub(/("|')/, '')
285
+ break
286
+ end
287
+ end
288
+ end
289
+ else
290
+ hash[:link] = item.xpath(tag_set.link).inner_text.to_s
291
+ if hash[:link].nil? || hash[:link].empty?
292
+ item.xpath('link').each do |link|
293
+ if /type=("|')text\/html("|')/.match(link.to_s)
294
+ if /rel=("|')alternate("|') /.match(link.to_s)
295
+ hash[:link] = /("|').*?("|')/.match(link.to_s).to_s
296
+ break
297
+ end
298
+ end
299
+ end
300
+ end
301
+ end
302
+ end
303
+ [:link, :date, :description, :author, :title].each do |i|
304
+ if !hash[i]
305
+ hash[i] = ''
306
+ end
307
+ end
308
+ return hash
309
+ end
310
+ end
311
+ end
@@ -0,0 +1,3 @@
1
+ module Arrogance
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,15 @@
1
+ module Arrogance
2
+
3
+ class Wrangler
4
+
5
+ class << self
6
+ def beat_into_submission(url)
7
+ tags = TagHandler.new(url)
8
+ data = DataHandler.new(url)
9
+ posts = PostHandler.new(url, tags)
10
+ blog_obj = BlogObject.new(posts.posts, data)
11
+ end
12
+ end
13
+ end
14
+
15
+ end
data/lib/arrogance.rb ADDED
@@ -0,0 +1,64 @@
1
+ require "arrogance/version"
2
+ require "arrogance/wrangler"
3
+ require "arrogance/tools"
4
+ require "arrogance/objects"
5
+
6
+ module Arrogance
7
+ class Lasso
8
+
9
+ #-- @param urls -> A list of urls. This param can take one of two forms:
10
+ #-- 1: Hash structured as follows:
11
+ #-- {:term => [Array, of, urls],
12
+ #-- :barack_obamas_second_term => [Another, array, of, urls]
13
+ #-- :etc ...
14
+ #-- }
15
+ #-- 2: Array:
16
+ #-- [Array, of, urls, that, can, be, as, long, as, you, want]
17
+
18
+ #-- Returns: In either case, the return will be of the same structure.
19
+ #-- However, any array(s) of urls passed in will be replaced (non-destructive)
20
+ #-- with array(s) of blogobjects:
21
+ #-- {:term => [BlogObject<#*@&#>, BlogObject<@&^#^@>, BlogObject<rubbish_chars>]}
22
+ #-- OR
23
+ #-- [BlogObject<#*@&#>, BlogObject<@&^#^@>, BlogObject<rubbish_chars>]
24
+
25
+ #NOTE: The returned BlogObject(s) can be manipulated as follows
26
+ #my_blog_object.most_recent
27
+ #-- => {:title => 'Blogger stating opinions!', :date => '2011-11-11...', :etc => '...'} <-- Thats a hash, treat it like one
28
+ #-- You can retrive individual posts by stating their position in reverse chronological order (newest first)
29
+ #my_blog_object.first
30
+ #-- => {:title => 'Blogger stating opinions!', :date => '2011-11-11 12:34:25', :etc => '...'}
31
+ #my_blog_object.first_and_seventh
32
+ #-- => [{:title => 'Blogger stating opinions', :yada => 'yada'}, {:title => 'Look at ma kittehs!!1!one', ...}]
33
+ #-- feel free to get rediculous:
34
+ #my_blog_object.first_and_second_and_fourth_and_ninth
35
+ #-- Arrogance will return a a max of 10
36
+ #-- Additionally, you can retrive the feed title, site link and feed_image (if they exist) by calling
37
+ #feed_title, #site_link and #feed_image methods of the BlogObject
38
+ #-- The guts of a BlogObject is really just an array of hashes. If you just want to deal with that yourself
39
+ #-- and not take advantage of all the cool methods I made for you, thats fine. Just call my_blog_object.guts
40
+ #-- and you can get that array of hashes to play with.
41
+
42
+ class << self
43
+ def read(urls)
44
+ if urls.kind_of? Hash
45
+ return build_a_hash_response urls
46
+ elsif urls.kind_of? Array
47
+ return build_an_array_reponse urls
48
+ end
49
+ end
50
+
51
+ def build_a_hash_response(urls)
52
+ arrogance = {}
53
+ urls.each do |k, v|
54
+ arrogance[k] = v.collect {|url| Wrangler.beat_into_submission(url)}
55
+ end
56
+ return arrogance
57
+ end
58
+
59
+ def build_an_array_reponse(urls)
60
+ return urls.collect {|url| Wrangler.beat_into_submission(url)}
61
+ end
62
+ end
63
+ end
64
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: arrogance
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.1
6
+ platform: ruby
7
+ authors:
8
+ - Gideon Providence
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-08-17 00:00:00 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ prerelease: false
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ type: :runtime
25
+ version_requirements: *id001
26
+ description: Easily manage RSS and Atom feeds of all kinds.
27
+ email:
28
+ - jprovidence@live.ca
29
+ executables: []
30
+
31
+ extensions: []
32
+
33
+ extra_rdoc_files: []
34
+
35
+ files:
36
+ - .gitignore
37
+ - Gemfile
38
+ - Rakefile
39
+ - arrogance.gemspec
40
+ - lib/arrogance.rb
41
+ - lib/arrogance/objects.rb
42
+ - lib/arrogance/tools.rb
43
+ - lib/arrogance/version.rb
44
+ - lib/arrogance/wrangler.rb
45
+ homepage: ""
46
+ licenses: []
47
+
48
+ post_install_message:
49
+ rdoc_options: []
50
+
51
+ require_paths:
52
+ - lib
53
+ required_ruby_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: "0"
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ requirements: []
66
+
67
+ rubyforge_project: arrogance
68
+ rubygems_version: 1.8.7
69
+ signing_key:
70
+ specification_version: 3
71
+ summary: A tool to easily manage RSS feeds of all kinds
72
+ test_files: []
73
+