celes-web 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+ .idea
15
+
16
+ # YARD artifacts
17
+ .yardoc
18
+ _yardoc
19
+ doc/
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm use --create 1.9.3@celes
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in edgar.gemspec
4
+ gemspec
5
+
6
+ gem 'rspec'
7
+ gem 'nokogiri'
@@ -0,0 +1,32 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ celes-web (1.0.0)
5
+ nokogiri
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ diff-lcs (1.2.5)
11
+ mini_portile (0.5.2)
12
+ nokogiri (1.6.0)
13
+ mini_portile (~> 0.5.0)
14
+ rake (10.1.0)
15
+ rspec (2.14.1)
16
+ rspec-core (~> 2.14.0)
17
+ rspec-expectations (~> 2.14.0)
18
+ rspec-mocks (~> 2.14.0)
19
+ rspec-core (2.14.7)
20
+ rspec-expectations (2.14.4)
21
+ diff-lcs (>= 1.1.3, < 2.0)
22
+ rspec-mocks (2.14.4)
23
+
24
+ PLATFORMS
25
+ ruby
26
+
27
+ DEPENDENCIES
28
+ bundler (~> 1.3)
29
+ celes-web!
30
+ nokogiri
31
+ rake
32
+ rspec
@@ -0,0 +1,44 @@
1
+ Celes
2
+ =====
3
+
4
+ A simple Ruby gem for parsing snippets of text and images based on the HTML content at a provided URL.
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ gem 'celes-web'
11
+
12
+ And then execute:
13
+
14
+ $ bundle install
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install 'celes-web'
19
+
20
+ ## Usage
21
+
22
+ Require 'celes' and then construct a Celes object providing the URL to parse
23
+
24
+ require 'celes'
25
+ c = Celes.new(url: 'http://www.yahoo.com')
26
+
27
+ Calling '#snippets' without any options will get the text snippets that are at least 40 characters long
28
+
29
+ c.snippets #=> ['Array', 'of', 'Strings']
30
+
31
+ Calling '#images' without any options will get the source field for any images on the page
32
+
33
+ c.images #=> ['Array', 'of', 'Strings']
34
+
35
+ ### Options
36
+
37
+ Use 'min_snippet_length' to define the minimum length of text in the document node to be considered a "snippet". The default is 40 characters.
38
+
39
+ c = Celes.new(url: 'http://www.yahoo.com', min_snippet_length: 100)
40
+
41
+ Use 'snip_length' to truncate longer snippets down to a shorter length. By default, Celes will truncate snippets to 140 characters.
42
+
43
+ c = Celes.new(url: 'http://www.yahoo.com', snip_length: 45)
44
+
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "celes-web"
7
+ spec.version = '1.0.0'
8
+ spec.authors = ["Andy Schrage"]
9
+ spec.email = ["ajschrag@mtu.edu"]
10
+ spec.description = %q{A simple Ruby gem for providing a snipet of text and images based on a url.}
11
+ spec.summary = %q{A simple Ruby gem for providing a snipet of text and thumbnails based on a url.}
12
+ spec.homepage = "https://github.com/Swimminschrage/celes"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files`.split($/)
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.3"
21
+ spec.add_development_dependency "rake"
22
+ spec.add_development_dependency 'rspec'
23
+ spec.add_dependency 'nokogiri'
24
+ end
@@ -0,0 +1,71 @@
1
+ require 'uri'
2
+ require 'net/http'
3
+ require 'nokogiri'
4
+
5
+ class Celes
6
+ attr_reader :url
7
+
8
+ def initialize(options = {})
9
+ raise 'No URL provided' unless options[:url]
10
+
11
+ @url = options[:url]
12
+ @uri = URI(@url)
13
+
14
+ parse_options(options)
15
+ end
16
+
17
+ def snippets
18
+ return @snippets if @snippets
19
+ make_request
20
+ @snippets
21
+ end
22
+
23
+ def images
24
+ return @images if @images
25
+ make_request
26
+ @images
27
+ end
28
+
29
+ private
30
+
31
+ def parse_options(options)
32
+ # Init defaults as necessary
33
+ @snip_length = options[:snip_length] || 140
34
+ @min_snippet_length = options[:min_snippet_length] || 40
35
+
36
+ # Normalize bad options
37
+ @min_snippet_length = 0 if @min_snippet_length < 0
38
+ @snip_length = 0 if @snip_length < 0
39
+ end
40
+
41
+ def make_request
42
+ resp = get_html_content
43
+ if resp
44
+ parse_html resp
45
+ else
46
+ raise "Unable to reach #{@url.to_s}"
47
+ end
48
+ end
49
+
50
+ def get_html_content
51
+ resp = Net::HTTP.get_response(@uri)
52
+ case resp
53
+ when Net::HTTPSuccess then resp.body
54
+ when Net::HTTPRedirection then
55
+ @url = resp['location']
56
+ @uri = URI(@url)
57
+ get_html_content
58
+ end
59
+ end
60
+
61
+ def parse_html (response)
62
+ doc = Nokogiri::HTML.parse(response)
63
+
64
+ @images = doc.xpath('/html/body//img/@src').map{ |x| x.value }
65
+ @snippets = doc.xpath('/html/body//*[self::p or self::span]/text()').map{ |x| x.to_s}.keep_if do |str|
66
+ str.strip.length >= @min_snippet_length
67
+ end
68
+
69
+ @snippets = @snippets.map { |x| x[0, @snip_length] }
70
+ end
71
+ end
@@ -0,0 +1,142 @@
1
+ require 'spec_helper'
2
+ require_relative '../lib/celes'
3
+ require 'uri'
4
+
5
+ describe Celes do
6
+ let(:url) {'http://www.theredheadproject.com'}
7
+ let(:celes) { Celes.new(url: url) }
8
+ subject{celes}
9
+
10
+ it {should respond_to(:images)}
11
+ it {should respond_to(:snippets)}
12
+ it {should respond_to(:url)}
13
+
14
+ it 'should error out when provided no url' do
15
+ expect{Celes.new}.to raise_error(RuntimeError)
16
+ end
17
+
18
+ describe 'when provided a valid url' do
19
+ its(:url) {should be == url}
20
+
21
+ context 'that causes a redirect' do
22
+ let(:url) {'http://google.com'} # Should redirect the user to http://www.google.com
23
+ let(:celes) { Celes.new(url: url) }
24
+ subject{celes}
25
+
26
+ its(:get_html_content) {should_not be nil}
27
+ end
28
+
29
+ describe 'that contains images and text snippets' do
30
+ before do
31
+ subject.stub(:get_html_content) { '<html><body><p>This is a test for some text that should be long enough</p><img src="myimg.png" width="100" height="100"/></body></html>' }
32
+ end
33
+
34
+ it 'should have only 1 image' do
35
+ expect(subject.images).to be_an Array
36
+ expect(subject.images.size).to be 1
37
+ expect(subject.images[0]).to be == 'myimg.png'
38
+ end
39
+
40
+ it 'should have only 1 snippet' do
41
+ expect(subject.snippets).to be_an Array
42
+ expect(subject.snippets.size).to be 1
43
+ expect(subject.snippets[0]).to be == 'This is a test for some text that should be long enough'
44
+ end
45
+ end
46
+
47
+ # Marking these tests as pending until I can figure out how to get Nokogiri to parse html that has img and p tags
48
+ # in the head.
49
+ describe 'only parses images and text snippets within the body' do
50
+ let(:url) {'http://www.theredheadproject.com'}
51
+ let(:nobody) { Celes.new(url: url) }
52
+ subject {nobody}
53
+ before do
54
+ nobody.stub(:get_html_content).and_return '<html><head><p>This is a test for some text that should be long enough</p><img src="myimg.png" width="100" height="100"/></head><body><p>Body Text</p></body></html>'
55
+ end
56
+
57
+ xit 'should have no images' do
58
+ expect(subject.images).to be_an Array
59
+ subject.images.each { |img| puts img }
60
+ expect(subject.images.size).to be 0
61
+
62
+ end
63
+
64
+ xit 'should have no snippets' do
65
+ expect(subject.snippets).to be_an Array
66
+ expect(subject.snippets.size).to be 0
67
+ end
68
+ end
69
+
70
+ describe 'can take options' do
71
+ context ':min_snippet_length' do
72
+ it 'defaults to 40 characters' do
73
+ c = Celes.new(url: url)
74
+ c.stub(:get_html_content).and_return '<p>Not 40 characters</p><p>Definitely 40 characters long test here!</p>'
75
+
76
+ expect(c.snippets.size).to be 1
77
+ expect(c.snippets[0]).to eq 'Definitely 40 characters long test here!'
78
+ end
79
+
80
+ it 'less than 0 defaults to 0' do
81
+ c = Celes.new(url: url, min_snippet_length: -10)
82
+ c.stub(:get_html_content).and_return '<p>Not 40 characters</p><p>Definitely 40 characters long test here!</p>'
83
+
84
+ expect(c.snippets.size).to be 2
85
+ expect(c.snippets[0]).to eq 'Not 40 characters'
86
+ expect(c.snippets[1]).to eq 'Definitely 40 characters long test here!'
87
+ end
88
+
89
+ it 'can be set to any int 0 or greater' do
90
+ c = Celes.new(url: url, min_snippet_length: 17)
91
+ c.stub(:get_html_content).and_return '<p>Not 40 characters</p><p>Definitely 40 characters long test here!</p>'
92
+
93
+ expect(c.snippets.size).to be 2
94
+ expect(c.snippets[0]).to eq 'Not 40 characters'
95
+ expect(c.snippets[1]).to eq 'Definitely 40 characters long test here!'
96
+ end
97
+
98
+ it 'can be set to 0' do
99
+ c = Celes.new(url: url, min_snippet_length: 0)
100
+ c.stub(:get_html_content).and_return '<p>A</p><p>Not 40 characters</p><p>Definitely 40 characters long test here!</p>'
101
+
102
+ expect(c.snippets.size).to be 3
103
+ expect(c.snippets[0]).to eq 'A'
104
+ end
105
+ end
106
+
107
+ context ':snip_length' do
108
+ let(:text59) { 'C' * 59 }
109
+ let(:text140) { 'A' * 140 }
110
+ let(:text150) { 'B' * 150 }
111
+
112
+ before {Celes.any_instance.stub(:get_html_content).and_return "<p>#{text59}</p><p>#{text140}</p><p>#{text150}</p>"}
113
+
114
+ it 'defaults to 140 characters' do
115
+ c = Celes.new(url: url)
116
+
117
+ expect(c.snippets.size).to be 3
118
+ expect(c.snippets[0]).to eq text59
119
+ expect(c.snippets[1]).to eq text140
120
+ expect(c.snippets[2]).to eq text150[0, 140]
121
+ end
122
+
123
+ it 'defaults to 0 when a negative value is provided' do
124
+ c = Celes.new(url: url, snip_length: -100)
125
+
126
+ expect(c.snippets.size).to be 3
127
+ expect(c.snippets[0]).to be_empty
128
+ expect(c.snippets[1]).to be_empty
129
+ end
130
+
131
+ it 'can be set to be any value greater than 0' do
132
+ c = Celes.new(url: url, snip_length: 60)
133
+
134
+ expect(c.snippets.size).to be 3
135
+ expect(c.snippets[0]).to eq text59
136
+ expect(c.snippets[1]).to eq text140[0, 60]
137
+ expect(c.snippets[2]).to eq text150[0, 60]
138
+ end
139
+ end
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,17 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # Require this file using `require "spec_helper"` to ensure that it is only
4
+ # loaded once.
5
+ #
6
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
+ RSpec.configure do |config|
8
+ config.treat_symbols_as_metadata_keys_with_true_values = true
9
+ config.run_all_when_everything_filtered = true
10
+ config.filter_run :focus
11
+
12
+ # Run specs in random order to surface order dependencies. If you find an
13
+ # order dependency and want to debug it, you can fix the order by providing
14
+ # the seed, which is printed after each run.
15
+ # --seed 1234
16
+ config.order = 'random'
17
+ end
metadata ADDED
@@ -0,0 +1,123 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: celes-web
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Andy Schrage
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-12-17 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.3'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: nokogiri
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ description: A simple Ruby gem for providing a snipet of text and images based on
79
+ a url.
80
+ email:
81
+ - ajschrag@mtu.edu
82
+ executables: []
83
+ extensions: []
84
+ extra_rdoc_files: []
85
+ files:
86
+ - .gitignore
87
+ - .rvmrc
88
+ - Gemfile
89
+ - Gemfile.lock
90
+ - README.md
91
+ - celes.gemspec
92
+ - lib/celes.rb
93
+ - spec/celes_spec.rb
94
+ - spec/spec_helper.rb
95
+ homepage: https://github.com/Swimminschrage/celes
96
+ licenses:
97
+ - MIT
98
+ post_install_message:
99
+ rdoc_options: []
100
+ require_paths:
101
+ - lib
102
+ required_ruby_version: !ruby/object:Gem::Requirement
103
+ none: false
104
+ requirements:
105
+ - - ! '>='
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ required_rubygems_version: !ruby/object:Gem::Requirement
109
+ none: false
110
+ requirements:
111
+ - - ! '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ requirements: []
115
+ rubyforge_project:
116
+ rubygems_version: 1.8.25
117
+ signing_key:
118
+ specification_version: 3
119
+ summary: A simple Ruby gem for providing a snipet of text and thumbnails based on
120
+ a url.
121
+ test_files:
122
+ - spec/celes_spec.rb
123
+ - spec/spec_helper.rb