celes-web 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+ .idea
15
+
16
+ # YARD artifacts
17
+ .yardoc
18
+ _yardoc
19
+ doc/
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm use --create 1.9.3@celes
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in edgar.gemspec
4
+ gemspec
5
+
6
+ gem 'rspec'
7
+ gem 'nokogiri'
@@ -0,0 +1,32 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ celes-web (1.0.0)
5
+ nokogiri
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ diff-lcs (1.2.5)
11
+ mini_portile (0.5.2)
12
+ nokogiri (1.6.0)
13
+ mini_portile (~> 0.5.0)
14
+ rake (10.1.0)
15
+ rspec (2.14.1)
16
+ rspec-core (~> 2.14.0)
17
+ rspec-expectations (~> 2.14.0)
18
+ rspec-mocks (~> 2.14.0)
19
+ rspec-core (2.14.7)
20
+ rspec-expectations (2.14.4)
21
+ diff-lcs (>= 1.1.3, < 2.0)
22
+ rspec-mocks (2.14.4)
23
+
24
+ PLATFORMS
25
+ ruby
26
+
27
+ DEPENDENCIES
28
+ bundler (~> 1.3)
29
+ celes-web!
30
+ nokogiri
31
+ rake
32
+ rspec
@@ -0,0 +1,44 @@
1
+ Celes
2
+ =====
3
+
4
+ A simple Ruby gem for parsing snippets of text and images based on the HTML content at a provided URL.
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ gem 'celes-web'
11
+
12
+ And then execute:
13
+
14
+ $ bundle install
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install 'celes-web'
19
+
20
+ ## Usage
21
+
22
+ Require 'celes' and then construct a Celes object providing the URL to parse
23
+
24
+ require 'celes'
25
+ c = Celes.new(url: 'http://www.yahoo.com')
26
+
27
+ Calling '#snippets' without any options will get the text snippets that are at least 40 characters long
28
+
29
+ c.snippets #=> ['Array', 'of', 'Strings']
30
+
31
+ Calling '#images' without any options will get the source field for any images on the page
32
+
33
+ c.images #=> ['Array', 'of', 'Strings']
34
+
35
+ ### Options
36
+
37
+ Use 'min_snippet_length' to define the minimum length of text in the document node to be considered a "snippet". The default is 40 characters.
38
+
39
+ c = Celes.new(url: 'http://www.yahoo.com', min_snippet_length: 100)
40
+
41
+ Use 'snip_length' to truncate longer snippets down to a shorter length. By default, Celes will truncate snippets to 140 characters.
42
+
43
+ c = Celes.new(url: 'http://www.yahoo.com', snip_length: 45)
44
+
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "celes-web"
7
+ spec.version = '1.0.0'
8
+ spec.authors = ["Andy Schrage"]
9
+ spec.email = ["ajschrag@mtu.edu"]
10
+ spec.description = %q{A simple Ruby gem for providing a snipet of text and images based on a url.}
11
+ spec.summary = %q{A simple Ruby gem for providing a snipet of text and thumbnails based on a url.}
12
+ spec.homepage = "https://github.com/Swimminschrage/celes"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files`.split($/)
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.3"
21
+ spec.add_development_dependency "rake"
22
+ spec.add_development_dependency 'rspec'
23
+ spec.add_dependency 'nokogiri'
24
+ end
@@ -0,0 +1,71 @@
1
+ require 'uri'
2
+ require 'net/http'
3
+ require 'nokogiri'
4
+
5
+ class Celes
6
+ attr_reader :url
7
+
8
+ def initialize(options = {})
9
+ raise 'No URL provided' unless options[:url]
10
+
11
+ @url = options[:url]
12
+ @uri = URI(@url)
13
+
14
+ parse_options(options)
15
+ end
16
+
17
+ def snippets
18
+ return @snippets if @snippets
19
+ make_request
20
+ @snippets
21
+ end
22
+
23
+ def images
24
+ return @images if @images
25
+ make_request
26
+ @images
27
+ end
28
+
29
+ private
30
+
31
+ def parse_options(options)
32
+ # Init defaults as necessary
33
+ @snip_length = options[:snip_length] || 140
34
+ @min_snippet_length = options[:min_snippet_length] || 40
35
+
36
+ # Normalize bad options
37
+ @min_snippet_length = 0 if @min_snippet_length < 0
38
+ @snip_length = 0 if @snip_length < 0
39
+ end
40
+
41
+ def make_request
42
+ resp = get_html_content
43
+ if resp
44
+ parse_html resp
45
+ else
46
+ raise "Unable to reach #{@url.to_s}"
47
+ end
48
+ end
49
+
50
+ def get_html_content
51
+ resp = Net::HTTP.get_response(@uri)
52
+ case resp
53
+ when Net::HTTPSuccess then resp.body
54
+ when Net::HTTPRedirection then
55
+ @url = resp['location']
56
+ @uri = URI(@url)
57
+ get_html_content
58
+ end
59
+ end
60
+
61
+ def parse_html (response)
62
+ doc = Nokogiri::HTML.parse(response)
63
+
64
+ @images = doc.xpath('/html/body//img/@src').map{ |x| x.value }
65
+ @snippets = doc.xpath('/html/body//*[self::p or self::span]/text()').map{ |x| x.to_s}.keep_if do |str|
66
+ str.strip.length >= @min_snippet_length
67
+ end
68
+
69
+ @snippets = @snippets.map { |x| x[0, @snip_length] }
70
+ end
71
+ end
@@ -0,0 +1,142 @@
1
+ require 'spec_helper'
2
+ require_relative '../lib/celes'
3
+ require 'uri'
4
+
5
+ describe Celes do
6
+ let(:url) {'http://www.theredheadproject.com'}
7
+ let(:celes) { Celes.new(url: url) }
8
+ subject{celes}
9
+
10
+ it {should respond_to(:images)}
11
+ it {should respond_to(:snippets)}
12
+ it {should respond_to(:url)}
13
+
14
+ it 'should error out when provided no url' do
15
+ expect{Celes.new}.to raise_error(RuntimeError)
16
+ end
17
+
18
+ describe 'when provided a valid url' do
19
+ its(:url) {should be == url}
20
+
21
+ context 'that causes a redirect' do
22
+ let(:url) {'http://google.com'} # Should redirect the user to http://www.google.com
23
+ let(:celes) { Celes.new(url: url) }
24
+ subject{celes}
25
+
26
+ its(:get_html_content) {should_not be nil}
27
+ end
28
+
29
+ describe 'that contains images and text snippets' do
30
+ before do
31
+ subject.stub(:get_html_content) { '<html><body><p>This is a test for some text that should be long enough</p><img src="myimg.png" width="100" height="100"/></body></html>' }
32
+ end
33
+
34
+ it 'should have only 1 image' do
35
+ expect(subject.images).to be_an Array
36
+ expect(subject.images.size).to be 1
37
+ expect(subject.images[0]).to be == 'myimg.png'
38
+ end
39
+
40
+ it 'should have only 1 snippet' do
41
+ expect(subject.snippets).to be_an Array
42
+ expect(subject.snippets.size).to be 1
43
+ expect(subject.snippets[0]).to be == 'This is a test for some text that should be long enough'
44
+ end
45
+ end
46
+
47
+ # Marking these tests as pending until I can figure out how to get Nokogiri to parse html that has img and p tags
48
+ # in the head.
49
+ describe 'only parses images and text snippets within the body' do
50
+ let(:url) {'http://www.theredheadproject.com'}
51
+ let(:nobody) { Celes.new(url: url) }
52
+ subject {nobody}
53
+ before do
54
+ nobody.stub(:get_html_content).and_return '<html><head><p>This is a test for some text that should be long enough</p><img src="myimg.png" width="100" height="100"/></head><body><p>Body Text</p></body></html>'
55
+ end
56
+
57
+ xit 'should have no images' do
58
+ expect(subject.images).to be_an Array
59
+ subject.images.each { |img| puts img }
60
+ expect(subject.images.size).to be 0
61
+
62
+ end
63
+
64
+ xit 'should have no snippets' do
65
+ expect(subject.snippets).to be_an Array
66
+ expect(subject.snippets.size).to be 0
67
+ end
68
+ end
69
+
70
+ describe 'can take options' do
71
+ context ':min_snippet_length' do
72
+ it 'defaults to 40 characters' do
73
+ c = Celes.new(url: url)
74
+ c.stub(:get_html_content).and_return '<p>Not 40 characters</p><p>Definitely 40 characters long test here!</p>'
75
+
76
+ expect(c.snippets.size).to be 1
77
+ expect(c.snippets[0]).to eq 'Definitely 40 characters long test here!'
78
+ end
79
+
80
+ it 'less than 0 defaults to 0' do
81
+ c = Celes.new(url: url, min_snippet_length: -10)
82
+ c.stub(:get_html_content).and_return '<p>Not 40 characters</p><p>Definitely 40 characters long test here!</p>'
83
+
84
+ expect(c.snippets.size).to be 2
85
+ expect(c.snippets[0]).to eq 'Not 40 characters'
86
+ expect(c.snippets[1]).to eq 'Definitely 40 characters long test here!'
87
+ end
88
+
89
+ it 'can be set to any int 0 or greater' do
90
+ c = Celes.new(url: url, min_snippet_length: 17)
91
+ c.stub(:get_html_content).and_return '<p>Not 40 characters</p><p>Definitely 40 characters long test here!</p>'
92
+
93
+ expect(c.snippets.size).to be 2
94
+ expect(c.snippets[0]).to eq 'Not 40 characters'
95
+ expect(c.snippets[1]).to eq 'Definitely 40 characters long test here!'
96
+ end
97
+
98
+ it 'can be set to 0' do
99
+ c = Celes.new(url: url, min_snippet_length: 0)
100
+ c.stub(:get_html_content).and_return '<p>A</p><p>Not 40 characters</p><p>Definitely 40 characters long test here!</p>'
101
+
102
+ expect(c.snippets.size).to be 3
103
+ expect(c.snippets[0]).to eq 'A'
104
+ end
105
+ end
106
+
107
+ context ':snip_length' do
108
+ let(:text59) { 'C' * 59 }
109
+ let(:text140) { 'A' * 140 }
110
+ let(:text150) { 'B' * 150 }
111
+
112
+ before {Celes.any_instance.stub(:get_html_content).and_return "<p>#{text59}</p><p>#{text140}</p><p>#{text150}</p>"}
113
+
114
+ it 'defaults to 140 characters' do
115
+ c = Celes.new(url: url)
116
+
117
+ expect(c.snippets.size).to be 3
118
+ expect(c.snippets[0]).to eq text59
119
+ expect(c.snippets[1]).to eq text140
120
+ expect(c.snippets[2]).to eq text150[0, 140]
121
+ end
122
+
123
+ it 'defaults to 0 when a negative value is provided' do
124
+ c = Celes.new(url: url, snip_length: -100)
125
+
126
+ expect(c.snippets.size).to be 3
127
+ expect(c.snippets[0]).to be_empty
128
+ expect(c.snippets[1]).to be_empty
129
+ end
130
+
131
+ it 'can be set to be any value greater than 0' do
132
+ c = Celes.new(url: url, snip_length: 60)
133
+
134
+ expect(c.snippets.size).to be 3
135
+ expect(c.snippets[0]).to eq text59
136
+ expect(c.snippets[1]).to eq text140[0, 60]
137
+ expect(c.snippets[2]).to eq text150[0, 60]
138
+ end
139
+ end
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,17 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # Require this file using `require "spec_helper"` to ensure that it is only
4
+ # loaded once.
5
+ #
6
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
+ RSpec.configure do |config|
8
+ config.treat_symbols_as_metadata_keys_with_true_values = true
9
+ config.run_all_when_everything_filtered = true
10
+ config.filter_run :focus
11
+
12
+ # Run specs in random order to surface order dependencies. If you find an
13
+ # order dependency and want to debug it, you can fix the order by providing
14
+ # the seed, which is printed after each run.
15
+ # --seed 1234
16
+ config.order = 'random'
17
+ end
metadata ADDED
@@ -0,0 +1,123 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: celes-web
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Andy Schrage
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-12-17 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.3'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: nokogiri
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ description: A simple Ruby gem for providing a snipet of text and images based on
79
+ a url.
80
+ email:
81
+ - ajschrag@mtu.edu
82
+ executables: []
83
+ extensions: []
84
+ extra_rdoc_files: []
85
+ files:
86
+ - .gitignore
87
+ - .rvmrc
88
+ - Gemfile
89
+ - Gemfile.lock
90
+ - README.md
91
+ - celes.gemspec
92
+ - lib/celes.rb
93
+ - spec/celes_spec.rb
94
+ - spec/spec_helper.rb
95
+ homepage: https://github.com/Swimminschrage/celes
96
+ licenses:
97
+ - MIT
98
+ post_install_message:
99
+ rdoc_options: []
100
+ require_paths:
101
+ - lib
102
+ required_ruby_version: !ruby/object:Gem::Requirement
103
+ none: false
104
+ requirements:
105
+ - - ! '>='
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ required_rubygems_version: !ruby/object:Gem::Requirement
109
+ none: false
110
+ requirements:
111
+ - - ! '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ requirements: []
115
+ rubyforge_project:
116
+ rubygems_version: 1.8.25
117
+ signing_key:
118
+ specification_version: 3
119
+ summary: A simple Ruby gem for providing a snipet of text and thumbnails based on
120
+ a url.
121
+ test_files:
122
+ - spec/celes_spec.rb
123
+ - spec/spec_helper.rb