flatfish 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +5 -4
- data/flatfish.gemspec +6 -1
- data/lib/flatfish.rb +1 -1
- metadata +67 -3
data/README.md
CHANGED
@@ -6,11 +6,12 @@ Flatfish is a lib to scrape HTML based on a CSV with CSS selectors and configura
|
|
6
6
|
The ultimate goal of Flatfish is to prep and load the HTML into Drupal.
|
7
7
|
|
8
8
|
## INSTALLATION
|
9
|
-
Flatfish is
|
9
|
+
Flatfish is on Rubygems, so you can just `gem install flatfish`. But if you are starting from scratch:
|
10
10
|
|
11
|
-
1. We're using Ruby 1.9.3, so install that with RVM, rbenv+ruby-build, or on your own.
|
12
|
-
2.
|
13
|
-
3.
|
11
|
+
1. We're using Ruby 1.9.3, so install that with RVM, rbenv+ruby-build, or on your own. Note there are some soft dependencies in Ruby that you should get, google or see http://goo.gl/YSvmp.
|
12
|
+
2. At the moment, Flatfish is setup to use MySQL, so you will need to install the mysql2 gem dependencies, notably libmysqlclient-dev.
|
13
|
+
3. Flatfish uses Nokogiri to pare the HTML, so you will also need to install its dependencies, libxslt-dev and libxml2-dev.
|
14
|
+
4. From here you should be able to `gem install flatfish` without any issues.
|
14
15
|
|
15
16
|
## NOTES
|
16
17
|
As Flatfish scrapes the HTML over-the-wire, it can be a bit slow (say 10 minutes for 500 pages), but you can speed things up by pointing to a local copy of your site by entering a value for `local_source` in the config.yml file (see the example directory).
|
data/flatfish.gemspec
CHANGED
@@ -5,13 +5,18 @@ require "flatfish"
|
|
5
5
|
Gem::Specification.new do |s|
|
6
6
|
s.name = 'flatfish'
|
7
7
|
s.version = Flatfish::VERSION
|
8
|
-
s.date = '
|
8
|
+
s.date = '2012-08-04'
|
9
9
|
s.summary = "Scrape web pages!"
|
10
10
|
s.description = "flatfish accepts a CSV of URLS with CSS selectors prepping them for insert into drupal"
|
11
11
|
s.authors = ["Tim Loudon", "Mike Crittenden"]
|
12
12
|
s.email = 'timl@drupalconnect.com'
|
13
13
|
s.homepage = 'https://github.com/drupalstaffing/flatfish'
|
14
14
|
|
15
|
+
s.add_dependency 'nokogiri'
|
16
|
+
s.add_dependency 'activerecord'
|
17
|
+
s.add_dependency 'mysql2'
|
18
|
+
s.add_dependency 'awesome_print'
|
19
|
+
|
15
20
|
s.files = `git ls-files`.split("\n")
|
16
21
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
22
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
data/lib/flatfish.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: flatfish
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,8 +10,72 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
14
|
-
dependencies:
|
13
|
+
date: 2012-08-04 00:00:00.000000000 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: nokogiri
|
17
|
+
requirement: !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '0'
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
none: false
|
27
|
+
requirements:
|
28
|
+
- - ! '>='
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
version: '0'
|
31
|
+
- !ruby/object:Gem::Dependency
|
32
|
+
name: activerecord
|
33
|
+
requirement: !ruby/object:Gem::Requirement
|
34
|
+
none: false
|
35
|
+
requirements:
|
36
|
+
- - ! '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
type: :runtime
|
40
|
+
prerelease: false
|
41
|
+
version_requirements: !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0'
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: mysql2
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :runtime
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ! '>='
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
- !ruby/object:Gem::Dependency
|
64
|
+
name: awesome_print
|
65
|
+
requirement: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ! '>='
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
71
|
+
type: :runtime
|
72
|
+
prerelease: false
|
73
|
+
version_requirements: !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ! '>='
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
15
79
|
description: flatfish accepts a CSV of URLS with CSS selectors prepping them for insert
|
16
80
|
into drupal
|
17
81
|
email: timl@drupalconnect.com
|