flatfish 0.3.3 → 0.3.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/Rakefile +0 -12
- data/example/bio.csv +32 -0
- data/example/config.yml +7 -2
- data/flatfish.gemspec +1 -1
- data/lib/flatfish.rb +1 -1
- data/lib/flatfish/page.rb +1 -1
- metadata +3 -2
data/.gitignore
CHANGED
data/Rakefile
CHANGED
@@ -9,15 +9,3 @@ Rake::TestTask.new("test_units") do |t|
|
|
9
9
|
t.verbose = false
|
10
10
|
t.warning = true
|
11
11
|
end
|
12
|
-
|
13
|
-
RUBY='1.9.3'
|
14
|
-
|
15
|
-
desc "Build gem"
|
16
|
-
task :build_gem do
|
17
|
-
system "rvm #{RUBY} do gem build flatfish.gemspec"
|
18
|
-
end
|
19
|
-
|
20
|
-
desc "Install gem"
|
21
|
-
task :install_gem => :build_gem do
|
22
|
-
system "sudo rvm #{RUBY} do gem install flatfish-*.gem"
|
23
|
-
end
|
data/example/bio.csv
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
"url","path","title","body","field_position"
|
2
|
+
"http://drupalconnect.com/team/john-florez","bio/john-florez","John Florez",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
3
|
+
"http://drupalconnect.com/team/jonathon-whitener","bio/jonathon-whitener","Jonathon Whitener",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
4
|
+
"http://drupalconnect.com/team/tim-loudon","bio/tim-loudon","Tim Loudon",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
5
|
+
"http://drupalconnect.com/team/mike-crittenden","bio/mike-crittenden","Mike Crittenden",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
6
|
+
"http://drupalconnect.com/team/christopher-jones","bio/christopher-jones","Christopher Jones",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
7
|
+
"http://drupalconnect.com/team/chris-akeley","bio/chris-akeley","Chris Akeley",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
8
|
+
"http://drupalconnect.com/team/kim-murphy","bio/kim-murphy","Kim Murphy",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
9
|
+
"http://drupalconnect.com/team/bill-minor","bio/bill-minor","Bill Minor",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
10
|
+
"http://drupalconnect.com/team/chris-boag","bio/chris-boag","Chris Boag",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
11
|
+
"http://drupalconnect.com/team/steve-edwards","bio/steve-edwards","Steve Edwards",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
12
|
+
"http://drupalconnect.com/team/roger-soper","bio/roger-soper","Roger Soper",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
13
|
+
"http://drupalconnect.com/team/chad-hester","bio/chad-hester","Chad Hester",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
14
|
+
"http://drupalconnect.com/team/heshan-wanigasooriya","bio/heshan-wanigasooriya","Heshan Wanigasooriya",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
15
|
+
"http://drupalconnect.com/team/edward-zwart","bio/ed-zwart","Ed Zwart",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
16
|
+
"http://drupalconnect.com/team/kathy-chavez","bio/kathy-chavez","Kathy Chavez",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
17
|
+
"http://drupalconnect.com/team/felipe-fidelix","bio/felipe-fidelix","Felipe Fidelix",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
18
|
+
"http://drupalconnect.com/team/jonas-flint","bio/jonas-flint","Jonas Flint",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
19
|
+
"http://drupalconnect.com/team/hally-turner","bio/hally-turner","Hally Turner",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
20
|
+
"http://drupalconnect.com/team/katelyn-cushman","bio/kate-cushman","Kate Cushman",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
21
|
+
"http://drupalconnect.com/team/lisa-lorhum","bio/lisa-lorhum","Lisa Lorhum",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
22
|
+
"http://drupalconnect.com/team/jessica-marland","bio/jessica-marland","Jessica Marland",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
23
|
+
"http://drupalconnect.com/team/anne-easterling","bio/anne-easterling","Anne Easterling",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
24
|
+
"http://drupalconnect.com/team/heinze-wegener","bio/heinze-wegener","Heinze Wegener",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
25
|
+
"http://drupalconnect.com/team/yao-gbanaglo","bio/yao-gbanaglo","Yao Gbanaglo",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
26
|
+
"http://drupalconnect.com/team/joseph-cheek","bio/joe-cheek","Joe Cheek",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
27
|
+
"http://drupalconnect.com/team/steven-jackson","bio/steve-jackson","Steve Jackson",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
28
|
+
"http://drupalconnect.com/team/nina-samberg","bio/nina-samberg","Nina Samberg",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
29
|
+
"http://drupalconnect.com/team/christopher-spiker","bio/chris-spiker","Chris Spiker",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
30
|
+
"http://drupalconnect.com/team/christina-young","bio/christina-young","Christina Young",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
31
|
+
"http://drupalconnect.com/team/trent-wyman","bio/trent-wyman","Trent Wyman",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
32
|
+
"http://drupalconnect.com/team/karen-sironen","bio/karen-sironen","Karen Sironen",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
data/example/config.yml
CHANGED
@@ -1,11 +1,12 @@
|
|
1
|
-
local_source: '' #use the web
|
2
|
-
|
3
1
|
db_user: 'root'
|
4
2
|
db_pass: 'root'
|
5
3
|
db: 'flatfish_sample'
|
6
4
|
|
7
5
|
# NOTE: these map to Drupal content types and AR database tables
|
8
6
|
types:
|
7
|
+
Bio:
|
8
|
+
csv: '/home/tloudon/workspace/flatfish/example/bio.csv'
|
9
|
+
host: 'http://drupalconnect.com'
|
9
10
|
Article:
|
10
11
|
csv: '/home/tloudon/workspace/flatfish/example/article.csv'
|
11
12
|
host: 'http://drupalconnect.com'
|
@@ -13,5 +14,9 @@ types:
|
|
13
14
|
csv: '/home/tloudon/workspace/flatfish/example/page.csv'
|
14
15
|
host: 'http://drupalconnect.com'
|
15
16
|
|
17
|
+
# use the web, otherwise path to local HTML root
|
18
|
+
local_source: ''
|
19
|
+
|
20
|
+
#
|
16
21
|
development:
|
17
22
|
max_rows: 1000
|
data/flatfish.gemspec
CHANGED
@@ -5,7 +5,7 @@ require "flatfish"
|
|
5
5
|
Gem::Specification.new do |s|
|
6
6
|
s.name = 'flatfish'
|
7
7
|
s.version = Flatfish::VERSION
|
8
|
-
s.date = '2012-
|
8
|
+
s.date = '2012-10-17'
|
9
9
|
s.summary = "Scrape web pages!"
|
10
10
|
s.description = "flatfish accepts a CSV of URLS with CSS selectors prepping them for insert into drupal"
|
11
11
|
s.authors = ["Tim Loudon", "Mike Crittenden"]
|
data/lib/flatfish.rb
CHANGED
data/lib/flatfish/page.rb
CHANGED
@@ -68,7 +68,7 @@ module Flatfish
|
|
68
68
|
@fields.each_with_index do |selectors, i|
|
69
69
|
next if -1 == selectors
|
70
70
|
html[@schema[i]] = ''
|
71
|
-
selectors.split('
|
71
|
+
selectors.split('&&').each do |selector|
|
72
72
|
update_hrefs(selector)
|
73
73
|
update_imgs(selector)
|
74
74
|
if @doc.css(selector).nil? then
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: flatfish
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-
|
13
|
+
date: 2012-10-17 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: nokogiri
|
@@ -93,6 +93,7 @@ files:
|
|
93
93
|
- TODO.md
|
94
94
|
- bin/flatfish
|
95
95
|
- example/article.csv
|
96
|
+
- example/bio.csv
|
96
97
|
- example/config.yml
|
97
98
|
- example/page.csv
|
98
99
|
- flatfish.gemspec
|