flatfish 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/Rakefile +0 -12
- data/example/bio.csv +32 -0
- data/example/config.yml +7 -2
- data/flatfish.gemspec +1 -1
- data/lib/flatfish.rb +1 -1
- data/lib/flatfish/page.rb +1 -1
- metadata +3 -2
data/.gitignore
CHANGED
data/Rakefile
CHANGED
|
@@ -9,15 +9,3 @@ Rake::TestTask.new("test_units") do |t|
|
|
|
9
9
|
t.verbose = false
|
|
10
10
|
t.warning = true
|
|
11
11
|
end
|
|
12
|
-
|
|
13
|
-
RUBY='1.9.3'
|
|
14
|
-
|
|
15
|
-
desc "Build gem"
|
|
16
|
-
task :build_gem do
|
|
17
|
-
system "rvm #{RUBY} do gem build flatfish.gemspec"
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
desc "Install gem"
|
|
21
|
-
task :install_gem => :build_gem do
|
|
22
|
-
system "sudo rvm #{RUBY} do gem install flatfish-*.gem"
|
|
23
|
-
end
|
data/example/bio.csv
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"url","path","title","body","field_position"
|
|
2
|
+
"http://drupalconnect.com/team/john-florez","bio/john-florez","John Florez",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
3
|
+
"http://drupalconnect.com/team/jonathon-whitener","bio/jonathon-whitener","Jonathon Whitener",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
4
|
+
"http://drupalconnect.com/team/tim-loudon","bio/tim-loudon","Tim Loudon",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
5
|
+
"http://drupalconnect.com/team/mike-crittenden","bio/mike-crittenden","Mike Crittenden",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
6
|
+
"http://drupalconnect.com/team/christopher-jones","bio/christopher-jones","Christopher Jones",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
7
|
+
"http://drupalconnect.com/team/chris-akeley","bio/chris-akeley","Chris Akeley",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
8
|
+
"http://drupalconnect.com/team/kim-murphy","bio/kim-murphy","Kim Murphy",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
9
|
+
"http://drupalconnect.com/team/bill-minor","bio/bill-minor","Bill Minor",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
10
|
+
"http://drupalconnect.com/team/chris-boag","bio/chris-boag","Chris Boag",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
11
|
+
"http://drupalconnect.com/team/steve-edwards","bio/steve-edwards","Steve Edwards",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
12
|
+
"http://drupalconnect.com/team/roger-soper","bio/roger-soper","Roger Soper",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
13
|
+
"http://drupalconnect.com/team/chad-hester","bio/chad-hester","Chad Hester",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
14
|
+
"http://drupalconnect.com/team/heshan-wanigasooriya","bio/heshan-wanigasooriya","Heshan Wanigasooriya",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
15
|
+
"http://drupalconnect.com/team/edward-zwart","bio/ed-zwart","Ed Zwart",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
16
|
+
"http://drupalconnect.com/team/kathy-chavez","bio/kathy-chavez","Kathy Chavez",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
17
|
+
"http://drupalconnect.com/team/felipe-fidelix","bio/felipe-fidelix","Felipe Fidelix",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
18
|
+
"http://drupalconnect.com/team/jonas-flint","bio/jonas-flint","Jonas Flint",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
19
|
+
"http://drupalconnect.com/team/hally-turner","bio/hally-turner","Hally Turner",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
20
|
+
"http://drupalconnect.com/team/katelyn-cushman","bio/kate-cushman","Kate Cushman",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
21
|
+
"http://drupalconnect.com/team/lisa-lorhum","bio/lisa-lorhum","Lisa Lorhum",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
22
|
+
"http://drupalconnect.com/team/jessica-marland","bio/jessica-marland","Jessica Marland",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
23
|
+
"http://drupalconnect.com/team/anne-easterling","bio/anne-easterling","Anne Easterling",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
24
|
+
"http://drupalconnect.com/team/heinze-wegener","bio/heinze-wegener","Heinze Wegener",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
25
|
+
"http://drupalconnect.com/team/yao-gbanaglo","bio/yao-gbanaglo","Yao Gbanaglo",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
26
|
+
"http://drupalconnect.com/team/joseph-cheek","bio/joe-cheek","Joe Cheek",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
27
|
+
"http://drupalconnect.com/team/steven-jackson","bio/steve-jackson","Steve Jackson",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
28
|
+
"http://drupalconnect.com/team/nina-samberg","bio/nina-samberg","Nina Samberg",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
29
|
+
"http://drupalconnect.com/team/christopher-spiker","bio/chris-spiker","Chris Spiker",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
30
|
+
"http://drupalconnect.com/team/christina-young","bio/christina-young","Christina Young",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
31
|
+
"http://drupalconnect.com/team/trent-wyman","bio/trent-wyman","Trent Wyman",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
|
32
|
+
"http://drupalconnect.com/team/karen-sironen","bio/karen-sironen","Karen Sironen",".field-name-field-user-bio",".field-name-field-user-position .field-item"
|
data/example/config.yml
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
local_source: '' #use the web
|
|
2
|
-
|
|
3
1
|
db_user: 'root'
|
|
4
2
|
db_pass: 'root'
|
|
5
3
|
db: 'flatfish_sample'
|
|
6
4
|
|
|
7
5
|
# NOTE: these map to Drupal content types and AR database tables
|
|
8
6
|
types:
|
|
7
|
+
Bio:
|
|
8
|
+
csv: '/home/tloudon/workspace/flatfish/example/bio.csv'
|
|
9
|
+
host: 'http://drupalconnect.com'
|
|
9
10
|
Article:
|
|
10
11
|
csv: '/home/tloudon/workspace/flatfish/example/article.csv'
|
|
11
12
|
host: 'http://drupalconnect.com'
|
|
@@ -13,5 +14,9 @@ types:
|
|
|
13
14
|
csv: '/home/tloudon/workspace/flatfish/example/page.csv'
|
|
14
15
|
host: 'http://drupalconnect.com'
|
|
15
16
|
|
|
17
|
+
# use the web, otherwise path to local HTML root
|
|
18
|
+
local_source: ''
|
|
19
|
+
|
|
20
|
+
#
|
|
16
21
|
development:
|
|
17
22
|
max_rows: 1000
|
data/flatfish.gemspec
CHANGED
|
@@ -5,7 +5,7 @@ require "flatfish"
|
|
|
5
5
|
Gem::Specification.new do |s|
|
|
6
6
|
s.name = 'flatfish'
|
|
7
7
|
s.version = Flatfish::VERSION
|
|
8
|
-
s.date = '2012-
|
|
8
|
+
s.date = '2012-10-17'
|
|
9
9
|
s.summary = "Scrape web pages!"
|
|
10
10
|
s.description = "flatfish accepts a CSV of URLS with CSS selectors prepping them for insert into drupal"
|
|
11
11
|
s.authors = ["Tim Loudon", "Mike Crittenden"]
|
data/lib/flatfish.rb
CHANGED
data/lib/flatfish/page.rb
CHANGED
|
@@ -68,7 +68,7 @@ module Flatfish
|
|
|
68
68
|
@fields.each_with_index do |selectors, i|
|
|
69
69
|
next if -1 == selectors
|
|
70
70
|
html[@schema[i]] = ''
|
|
71
|
-
selectors.split('
|
|
71
|
+
selectors.split('&&').each do |selector|
|
|
72
72
|
update_hrefs(selector)
|
|
73
73
|
update_imgs(selector)
|
|
74
74
|
if @doc.css(selector).nil? then
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: flatfish
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.4
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -10,7 +10,7 @@ authors:
|
|
|
10
10
|
autorequire:
|
|
11
11
|
bindir: bin
|
|
12
12
|
cert_chain: []
|
|
13
|
-
date: 2012-
|
|
13
|
+
date: 2012-10-17 00:00:00.000000000 Z
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
16
16
|
name: nokogiri
|
|
@@ -93,6 +93,7 @@ files:
|
|
|
93
93
|
- TODO.md
|
|
94
94
|
- bin/flatfish
|
|
95
95
|
- example/article.csv
|
|
96
|
+
- example/bio.csv
|
|
96
97
|
- example/config.yml
|
|
97
98
|
- example/page.csv
|
|
98
99
|
- flatfish.gemspec
|