scrapzirra 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/.travis.yml +3 -0
- data/.watchr +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +19 -0
- data/Rakefile +6 -0
- data/lib/scrapzirra.rb +7 -0
- data/lib/scrapzirra/feed_utilities.rb +13 -0
- data/lib/scrapzirra/injector.rb +11 -0
- data/lib/scrapzirra/scrap.rb +28 -0
- data/lib/scrapzirra/version.rb +3 -0
- data/scrapzirra.gemspec +22 -0
- data/spec/sample_feeds/AmazonWebServicesBlog.xml +796 -0
- data/spec/sample_feeds/AmazonWebServicesBlogFirstEntryContent.xml +63 -0
- data/spec/sample_feeds/swartz.html +1828 -0
- data/spec/scrapzirra/injector_spec.rb +30 -0
- data/spec/scrapzirra/scrap_spec.rb +21 -0
- data/spec/spec_helper.rb +28 -0
- metadata +103 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), %w[.. spec_helper])
|
2
|
+
|
3
|
+
describe Scrapzirra::Injector do
|
4
|
+
let(:feed) do
|
5
|
+
Feedzirra::Parser::Atom.parse(sample_atom_feed)
|
6
|
+
end
|
7
|
+
|
8
|
+
let(:injector) { Scrapzirra::Injector.new feed}
|
9
|
+
|
10
|
+
let(:injected_feed) { injector.feed }
|
11
|
+
|
12
|
+
subject { injected_feed }
|
13
|
+
|
14
|
+
its(:title) { should eq("Amazon Web Services Blog")}
|
15
|
+
its(:url) { should eq("http://aws.typepad.com/aws/")}
|
16
|
+
|
17
|
+
describe '.scrap' do
|
18
|
+
its(:"scrap.class") { should be(Scrapzirra::Scrap)}
|
19
|
+
describe '.doc' do
|
20
|
+
subject { injected_feed.scrap.doc }
|
21
|
+
its(:class){ should be(Nokogiri::HTML::Document)}
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe '.entries' do
|
26
|
+
subject { feed.entries.first }
|
27
|
+
its(:url) { should eq("http://aws.typepad.com/aws/2009/01/aws-job-architect-designer-position-in-turkey.html")}
|
28
|
+
its(:"scrap.class") { should be(Scrapzirra::Scrap)}
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), %w[.. spec_helper])
|
2
|
+
|
3
|
+
describe Scrapzirra::Scrap do
|
4
|
+
let(:scrap) { Scrapzirra::Scrap.new nil, doc: open("#{File.dirname(__FILE__)}/../sample_feeds/swartz.html")}
|
5
|
+
|
6
|
+
describe '.doc' do
|
7
|
+
subject { scrap.doc }
|
8
|
+
its(:class){ should be(Nokogiri::HTML::Document)}
|
9
|
+
|
10
|
+
describe '.css' do
|
11
|
+
subject { scrap.doc.css('article header h1').collect{|a| a.content }.inject(:+) }
|
12
|
+
it { should match(/Aaron Swartz/) }
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
describe '.selector_content' do
|
17
|
+
subject { scrap.selector_content('article header h1')}
|
18
|
+
it { should match(/dziecko internetu/) }
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
begin
|
2
|
+
require 'simplecov'
|
3
|
+
SimpleCov.start do
|
4
|
+
add_filter "/spec/"
|
5
|
+
end
|
6
|
+
rescue LoadError
|
7
|
+
end
|
8
|
+
|
9
|
+
require File.expand_path(File.dirname(__FILE__) + '/../lib/scrapzirra')
|
10
|
+
require 'feedzirra'
|
11
|
+
|
12
|
+
def load_sample(filename)
|
13
|
+
File.read("#{File.dirname(__FILE__)}/sample_feeds/#{filename}")
|
14
|
+
end
|
15
|
+
|
16
|
+
def sample_atom_feed
|
17
|
+
load_sample("AmazonWebServicesBlog.xml")
|
18
|
+
end
|
19
|
+
|
20
|
+
def sample_atom_entry_content
|
21
|
+
load_sample("AmazonWebServicesBlogFirstEntryContent.xml")
|
22
|
+
end
|
23
|
+
|
24
|
+
RSpec.configure do |config|
|
25
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
26
|
+
config.run_all_when_everything_filtered = true
|
27
|
+
config.filter_run :focus
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: scrapzirra
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.2
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Tomasz Tokarski
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-01-27 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
type: :runtime
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ! '>='
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
version: '0'
|
29
|
+
name: feedzirra
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
type: :development
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ! '>='
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
name: rspec
|
46
|
+
description: Addional script parsing tools for feedzirra
|
47
|
+
email:
|
48
|
+
- tomasz@tomasztokarski.com
|
49
|
+
executables: []
|
50
|
+
extensions: []
|
51
|
+
extra_rdoc_files: []
|
52
|
+
files:
|
53
|
+
- .gitignore
|
54
|
+
- .travis.yml
|
55
|
+
- .watchr
|
56
|
+
- Gemfile
|
57
|
+
- LICENSE.txt
|
58
|
+
- README.md
|
59
|
+
- Rakefile
|
60
|
+
- lib/scrapzirra.rb
|
61
|
+
- lib/scrapzirra/feed_utilities.rb
|
62
|
+
- lib/scrapzirra/injector.rb
|
63
|
+
- lib/scrapzirra/scrap.rb
|
64
|
+
- lib/scrapzirra/version.rb
|
65
|
+
- scrapzirra.gemspec
|
66
|
+
- spec/sample_feeds/AmazonWebServicesBlog.xml
|
67
|
+
- spec/sample_feeds/AmazonWebServicesBlogFirstEntryContent.xml
|
68
|
+
- spec/sample_feeds/swartz.html
|
69
|
+
- spec/scrapzirra/injector_spec.rb
|
70
|
+
- spec/scrapzirra/scrap_spec.rb
|
71
|
+
- spec/spec_helper.rb
|
72
|
+
homepage: https://github.com/tiokksar/scrapzirra
|
73
|
+
licenses: []
|
74
|
+
post_install_message:
|
75
|
+
rdoc_options: []
|
76
|
+
require_paths:
|
77
|
+
- lib
|
78
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
79
|
+
none: false
|
80
|
+
requirements:
|
81
|
+
- - ! '>='
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
85
|
+
none: false
|
86
|
+
requirements:
|
87
|
+
- - ! '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
requirements: []
|
91
|
+
rubyforge_project:
|
92
|
+
rubygems_version: 1.8.24
|
93
|
+
signing_key:
|
94
|
+
specification_version: 3
|
95
|
+
summary: Addional script parsing tools for feedzirra
|
96
|
+
test_files:
|
97
|
+
- spec/sample_feeds/AmazonWebServicesBlog.xml
|
98
|
+
- spec/sample_feeds/AmazonWebServicesBlogFirstEntryContent.xml
|
99
|
+
- spec/sample_feeds/swartz.html
|
100
|
+
- spec/scrapzirra/injector_spec.rb
|
101
|
+
- spec/scrapzirra/scrap_spec.rb
|
102
|
+
- spec/spec_helper.rb
|
103
|
+
has_rdoc:
|