flatfish 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +5 -4
- data/flatfish.gemspec +6 -1
- data/lib/flatfish.rb +1 -1
- metadata +67 -3
data/README.md
CHANGED
@@ -6,11 +6,12 @@ Flatfish is a lib to scrape HTML based on a CSV with CSS selectors and configura
|
|
6
6
|
The ultimate goal of Flatfish is to prep and load the HTML into Drupal.
|
7
7
|
|
8
8
|
## INSTALLATION
|
9
|
-
Flatfish is
|
9
|
+
Flatfish is on Rubygems, so you can just `gem install flatfish`. But if you are starting from scratch:
|
10
10
|
|
11
|
-
1. We're using Ruby 1.9.3, so install that with RVM, rbenv+ruby-build, or on your own.
|
12
|
-
2.
|
13
|
-
3.
|
11
|
+
1. We're using Ruby 1.9.3, so install that with RVM, rbenv+ruby-build, or on your own. Note there are some soft dependencies in Ruby that you should get, google or see http://goo.gl/YSvmp.
|
12
|
+
2. At the moment, Flatfish is setup to use MySQL, so you will need to install the mysql2 gem dependencies, notably libmysqlclient-dev.
|
13
|
+
3. Flatfish uses Nokogiri to pare the HTML, so you will also need to install its dependencies, libxslt-dev and libxml2-dev.
|
14
|
+
4. From here you should be able to `gem install flatfish` without any issues.
|
14
15
|
|
15
16
|
## NOTES
|
16
17
|
As Flatfish scrapes the HTML over-the-wire, it can be a bit slow (say 10 minutes for 500 pages), but you can speed things up by pointing to a local copy of your site by entering a value for `local_source` in the config.yml file (see the example directory).
|
data/flatfish.gemspec
CHANGED
@@ -5,13 +5,18 @@ require "flatfish"
|
|
5
5
|
Gem::Specification.new do |s|
|
6
6
|
s.name = 'flatfish'
|
7
7
|
s.version = Flatfish::VERSION
|
8
|
-
s.date = '
|
8
|
+
s.date = '2012-08-04'
|
9
9
|
s.summary = "Scrape web pages!"
|
10
10
|
s.description = "flatfish accepts a CSV of URLS with CSS selectors prepping them for insert into drupal"
|
11
11
|
s.authors = ["Tim Loudon", "Mike Crittenden"]
|
12
12
|
s.email = 'timl@drupalconnect.com'
|
13
13
|
s.homepage = 'https://github.com/drupalstaffing/flatfish'
|
14
14
|
|
15
|
+
s.add_dependency 'nokogiri'
|
16
|
+
s.add_dependency 'activerecord'
|
17
|
+
s.add_dependency 'mysql2'
|
18
|
+
s.add_dependency 'awesome_print'
|
19
|
+
|
15
20
|
s.files = `git ls-files`.split("\n")
|
16
21
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
22
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
data/lib/flatfish.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: flatfish
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,8 +10,72 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
14
|
-
dependencies:
|
13
|
+
date: 2012-08-04 00:00:00.000000000 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: nokogiri
|
17
|
+
requirement: !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '0'
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
none: false
|
27
|
+
requirements:
|
28
|
+
- - ! '>='
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
version: '0'
|
31
|
+
- !ruby/object:Gem::Dependency
|
32
|
+
name: activerecord
|
33
|
+
requirement: !ruby/object:Gem::Requirement
|
34
|
+
none: false
|
35
|
+
requirements:
|
36
|
+
- - ! '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
type: :runtime
|
40
|
+
prerelease: false
|
41
|
+
version_requirements: !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0'
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: mysql2
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :runtime
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ! '>='
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
- !ruby/object:Gem::Dependency
|
64
|
+
name: awesome_print
|
65
|
+
requirement: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ! '>='
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
71
|
+
type: :runtime
|
72
|
+
prerelease: false
|
73
|
+
version_requirements: !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ! '>='
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
15
79
|
description: flatfish accepts a CSV of URLS with CSS selectors prepping them for insert
|
16
80
|
into drupal
|
17
81
|
email: timl@drupalconnect.com
|