csvget 0.0.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/CHANGELOG +8 -0
- data/LICENSE +20 -0
- data/README.rdoc +93 -0
- data/Rakefile +62 -0
- data/VERSION +1 -0
- data/bin/csvget +34 -0
- data/bin/jsonget +28 -0
- data/csvget.gemspec +65 -0
- data/hn.let +10 -0
- data/lib/csvget.rb +54 -0
- data/lib/jsonget.rb +29 -0
- data/test/csvget_test.rb +20 -0
- metadata +71 -18
data/.gitignore
ADDED
data/CHANGELOG
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Kyle Maxwell
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
= csvget (also, jsonget)
|
2
|
+
|
3
|
+
== What's new in 0.3.0
|
4
|
+
|
5
|
+
1. Added command-line option:
|
6
|
+
|
7
|
+
--filter=RUBY_CODE RUBY_CODE will be eval'd in context of @row.is_a?(FasterCSV::Row)
|
8
|
+
|
9
|
+
# Example:
|
10
|
+
./bin/csvget --require=chronic --require=time --filter "@row['time']=Chronic.parse(@row['time']).iso8601" # ...
|
11
|
+
|
12
|
+
2. Added CHANGELOG
|
13
|
+
|
14
|
+
== Dependencies
|
15
|
+
|
16
|
+
- http://github.com/fizx/parsley/tree/master and its dependencies.
|
17
|
+
- Rubygems
|
18
|
+
|
19
|
+
== Running on EC2
|
20
|
+
|
21
|
+
> git clone git://github.com/fizx/csvget-ec2-recipe.git
|
22
|
+
> cd csvget-ec2-recipe
|
23
|
+
> ./boot.rb
|
24
|
+
> ssh YOUR_INSTANCE
|
25
|
+
|
26
|
+
== Local Installation
|
27
|
+
|
28
|
+
1. Install the dependencies.
|
29
|
+
2. > gem sources -a http://gems.github.com
|
30
|
+
3. > sudo gem install fizx-csvget
|
31
|
+
|
32
|
+
== Example Usage
|
33
|
+
|
34
|
+
> cat hn.let
|
35
|
+
{
|
36
|
+
"headlines":[{
|
37
|
+
"title": ".title a",
|
38
|
+
"link": ".title a @href",
|
39
|
+
"comments": "match(.subtext a:nth-child(3), '\\d+')",
|
40
|
+
"user": ".subtext a:nth-child(2)",
|
41
|
+
"score": "match(.subtext span, '\\d+')",
|
42
|
+
"time": "match(.subtext, '\\d+\\s+\\w+\\s+ago')"
|
43
|
+
}]
|
44
|
+
}
|
45
|
+
> csvget --directory-prefix=./data -A "/x" -w 5 --parselet=hn.let http://news.ycombinator.com/
|
46
|
+
> head data/headlines.csv
|
47
|
+
comments,title,time,link,score,user
|
48
|
+
4,Simpson's paradox: why mistrust seemingly simple statistics,2 hours ago,http://en.wikipedia.org/wiki/Simpson%27s_paradox,41,waldrews
|
49
|
+
67,America's unjust sex laws,2 hours ago,http://www.economist.com/opinion/displaystory.cfm?story_id=14165460,59,MikeCapone
|
50
|
+
23,Buy somebody lunch,3 hours ago,http://www.whattofix.com/blog/archives/2009/08/buy-somebody-lu.php,58,DanielBMarkham
|
51
|
+
10,A design pattern is an artifact of a missing feature in your chosen language,3 hours ago,http://www.snell-pym.org.uk/archives/2008/12/29/design-patterns/,31,bensummers
|
52
|
+
4,API changes in Snow Leopard,1 hour ago,http://developer.apple.com/mac/library/releasenotes/MacOSX/WhatsNewInOSX/Articles/MacOSX10_6.html#//apple_ref/doc/uid/TP40008898-SW1,14,pieter
|
53
|
+
16,How to run a linux based home web server,3 hours ago,http://stevehanov.ca/blog/index.php?id=73,28,RiderOfGiraffes
|
54
|
+
1,"OpenCL ""Hello World""",1 hour ago,"http://developer.apple.com/mac/library/documentation/Performance/Conceptual/OpenCL_MacProgGuide/Example:Hello,World/Example:Hello,World.html",8,pieter
|
55
|
+
15,US Senate bill allows White House to disconnect private computers from Internet,4 hours ago,http://news.cnet.com/8301-13578_3-10320096-38.html,35,drewr
|
56
|
+
1,Strategy: Solve Only 80 Percent of the Problem,47 minutes ago,http://highscalability.com/strategy-solve-only-80-percent-problem,6,alrex021
|
57
|
+
> csvget -h
|
58
|
+
Usage: ./bin/csvget [options] SEED_URL [SEED_URL2 ...]
|
59
|
+
--parselet=JSON_FILE JSON_FILE is a parselet.
|
60
|
+
-w, --wait=SECONDS wait SECONDS between retrievals.
|
61
|
+
-P, --directory-prefix=PREFIX save files to PREFIX/...
|
62
|
+
-U, --user-agent=AGENT identify as AGENT instead of RWget/VERSION.
|
63
|
+
-A, --accept-pattern=RUBY_REGEX URLs must match RUBY_REGEX to be saved to the queue.
|
64
|
+
--time-limit=AMOUNT Crawler will stop after this AMOUNT of time has passed.
|
65
|
+
-R, --reject-pattern=RUBY_REGEX URLs must NOT match RUBY_REGEX to be saved to the queue.
|
66
|
+
--require=RUBY_SCRIPT Will execute 'require RUBY_SCRIPT'
|
67
|
+
--limit-rate=RATE limit download rate to RATE.
|
68
|
+
--http-proxy=URL Proxies via URL
|
69
|
+
--proxy-user=USER Sets proxy user to USER
|
70
|
+
--proxy-password=PASSWORD Sets proxy password to PASSWORD
|
71
|
+
--fetch-class=RUBY_CLASS Must implement fetch(uri, user_agent_string) #=> [final_redirected_url, file_object]
|
72
|
+
--store-class=RUBY_CLASS Must implement put(key_string, temp_file)
|
73
|
+
--dupes-class=RUBY_CLASS Must implement dupe?(uri)
|
74
|
+
--queue-class=RUBY_CLASS Must implement put(key_string, depth_int) and get() #=> [key_string, depth_int]
|
75
|
+
--links-class=RUBY_CLASS Must implement urls(base_uri, temp_file) #=> [uri, ...]
|
76
|
+
-S, --sitemap=URL URL of a sitemap to crawl (will ignore inter-page links)
|
77
|
+
-V, --version
|
78
|
+
-Q, --quota=NUMBER set retrieval quota to NUMBER.
|
79
|
+
--max-redirect=NUM maximum redirections allowed per page.
|
80
|
+
-H, --span-hosts go to foreign hosts when recursive
|
81
|
+
--connect-timeout=SECS set the connect timeout to SECS.
|
82
|
+
-T, --timeout=SECS set all timeout values to SECONDS.
|
83
|
+
-l, --level=NUMBER maximum recursion depth (inf or 0 for infinite).
|
84
|
+
--[no-]timestampize Prepend the timestamp of when the crawl started to the directory structure.
|
85
|
+
--incremental-from=PREVIOUS Build upon the indexing already saved in PREVIOUS.
|
86
|
+
--protocol-directories use protocol name in directories.
|
87
|
+
--no-host-directories don't create host directories.
|
88
|
+
-v, --[no-]verbose Run verbosely
|
89
|
+
-h, --help Show this message
|
90
|
+
|
91
|
+
== Copyright
|
92
|
+
|
93
|
+
Copyright (c) 2009 Kyle Maxwell. See LICENSE for details (MIT).
|
data/Rakefile
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "csvget"
|
8
|
+
gem.executables = ["csvget", "jsonget"]
|
9
|
+
gem.summary = %Q{Uses parselets and rwget to generate csv files from websites}
|
10
|
+
gem.description = %Q{Super easy to use (but lots of dependencies :/) parser}
|
11
|
+
gem.email = "kyle@kylemaxwell.com"
|
12
|
+
gem.homepage = "http://github.com/fizx/csvget"
|
13
|
+
gem.authors = ["Kyle Maxwell"]
|
14
|
+
gem.add_dependency("fizx-rwget", ["> 0.2.3"])
|
15
|
+
gem.add_dependency("fizx-parsley-ruby", ["> 0.0.0"])
|
16
|
+
gem.add_dependency("activesupport", ["> 0.0.0"])
|
17
|
+
gem.add_dependency("fastercsv", [">= 1.4.0"])
|
18
|
+
|
19
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
20
|
+
end
|
21
|
+
Jeweler::GemcutterTasks.new
|
22
|
+
rescue LoadError
|
23
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
24
|
+
end
|
25
|
+
|
26
|
+
require 'rake/testtask'
|
27
|
+
Rake::TestTask.new(:test) do |test|
|
28
|
+
test.libs << 'lib' << 'test'
|
29
|
+
test.pattern = 'test/**/*_test.rb'
|
30
|
+
test.verbose = true
|
31
|
+
end
|
32
|
+
|
33
|
+
begin
|
34
|
+
require 'rcov/rcovtask'
|
35
|
+
Rcov::RcovTask.new do |test|
|
36
|
+
test.libs << 'test'
|
37
|
+
test.pattern = 'test/**/*_test.rb'
|
38
|
+
test.verbose = true
|
39
|
+
end
|
40
|
+
rescue LoadError
|
41
|
+
task :rcov do
|
42
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
task :test => :check_dependencies
|
47
|
+
|
48
|
+
task :default => :test
|
49
|
+
|
50
|
+
require 'rake/rdoctask'
|
51
|
+
Rake::RDocTask.new do |rdoc|
|
52
|
+
if File.exist?('VERSION')
|
53
|
+
version = File.read('VERSION')
|
54
|
+
else
|
55
|
+
version = ""
|
56
|
+
end
|
57
|
+
|
58
|
+
rdoc.rdoc_dir = 'rdoc'
|
59
|
+
rdoc.title = "csvget #{version}"
|
60
|
+
rdoc.rdoc_files.include('README*')
|
61
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
62
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.4.0
|
data/bin/csvget
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "rubygems"
|
3
|
+
require File.dirname(__FILE__) + "/../lib/csvget"
|
4
|
+
require "rwget"
|
5
|
+
|
6
|
+
parser = RWGetOptionParser.new do |opts|
|
7
|
+
opts.on("--parselet=JSON_FILE", "JSON_FILE is a parselet.") do |path|
|
8
|
+
parser.options[:parselets] ||= []
|
9
|
+
parser.options[:parselets] << path
|
10
|
+
end
|
11
|
+
|
12
|
+
opts.on("--filter=RUBY_CODE", "RUBY_CODE will be eval'd in context of @row.is_a?(FasterCSV::Row)") do |filter|
|
13
|
+
parser.options[:filter] ||= []
|
14
|
+
parser.options[:filter] << filter
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
parser.parse!
|
19
|
+
|
20
|
+
if parser.options[:seeds].empty?
|
21
|
+
puts parser.usage
|
22
|
+
puts " -h for options listing"
|
23
|
+
exit(1)
|
24
|
+
end
|
25
|
+
|
26
|
+
parser.options[:store_class] ||= "CSVStore"
|
27
|
+
|
28
|
+
controller = RWGet::Controller.new(parser.options)
|
29
|
+
begin
|
30
|
+
controller.start
|
31
|
+
ensure
|
32
|
+
STDERR.puts "Closing..."
|
33
|
+
controller.close
|
34
|
+
end
|
data/bin/jsonget
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "rubygems"
|
3
|
+
require File.dirname(__FILE__) + "/../lib/jsonget"
|
4
|
+
require "rwget"
|
5
|
+
|
6
|
+
parser = RWGetOptionParser.new do |opts|
|
7
|
+
opts.on("--parselet=JSON_FILE", "JSON_FILE is a parselet.") do |path|
|
8
|
+
parser.options[:parselets] ||= []
|
9
|
+
parser.options[:parselets] << path
|
10
|
+
end
|
11
|
+
end
|
12
|
+
parser.parse!
|
13
|
+
|
14
|
+
if parser.options[:seeds].empty?
|
15
|
+
puts parser.usage
|
16
|
+
puts " -h for options listing"
|
17
|
+
exit(1)
|
18
|
+
end
|
19
|
+
|
20
|
+
parser.options[:store_class] ||= "JSONStore"
|
21
|
+
|
22
|
+
controller = RWGet::Controller.new(parser.options)
|
23
|
+
begin
|
24
|
+
controller.start
|
25
|
+
ensure
|
26
|
+
STDERR.puts "Closing..."
|
27
|
+
controller.close
|
28
|
+
end
|
data/csvget.gemspec
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{csvget}
|
8
|
+
s.version = "0.4.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Kyle Maxwell"]
|
12
|
+
s.date = %q{2009-10-16}
|
13
|
+
s.description = %q{Super easy to use (but lots of dependencies :/) parser}
|
14
|
+
s.email = %q{kyle@kylemaxwell.com}
|
15
|
+
s.executables = ["csvget", "jsonget"]
|
16
|
+
s.extra_rdoc_files = [
|
17
|
+
"LICENSE",
|
18
|
+
"README.rdoc"
|
19
|
+
]
|
20
|
+
s.files = [
|
21
|
+
".gitignore",
|
22
|
+
"CHANGELOG",
|
23
|
+
"LICENSE",
|
24
|
+
"README.rdoc",
|
25
|
+
"Rakefile",
|
26
|
+
"VERSION",
|
27
|
+
"bin/csvget",
|
28
|
+
"bin/jsonget",
|
29
|
+
"csvget.gemspec",
|
30
|
+
"hn.let",
|
31
|
+
"lib/csvget.rb",
|
32
|
+
"lib/jsonget.rb",
|
33
|
+
"test/csvget_test.rb"
|
34
|
+
]
|
35
|
+
s.homepage = %q{http://github.com/fizx/csvget}
|
36
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
37
|
+
s.require_paths = ["lib"]
|
38
|
+
s.rubygems_version = %q{1.3.4}
|
39
|
+
s.summary = %q{Uses parselets and rwget to generate csv files from websites}
|
40
|
+
s.test_files = [
|
41
|
+
"test/csvget_test.rb"
|
42
|
+
]
|
43
|
+
|
44
|
+
if s.respond_to? :specification_version then
|
45
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
46
|
+
s.specification_version = 3
|
47
|
+
|
48
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
49
|
+
s.add_runtime_dependency(%q<fizx-rwget>, ["> 0.2.3"])
|
50
|
+
s.add_runtime_dependency(%q<fizx-parsley-ruby>, ["> 0.0.0"])
|
51
|
+
s.add_runtime_dependency(%q<activesupport>, ["> 0.0.0"])
|
52
|
+
s.add_runtime_dependency(%q<fastercsv>, [">= 1.4.0"])
|
53
|
+
else
|
54
|
+
s.add_dependency(%q<fizx-rwget>, ["> 0.2.3"])
|
55
|
+
s.add_dependency(%q<fizx-parsley-ruby>, ["> 0.0.0"])
|
56
|
+
s.add_dependency(%q<activesupport>, ["> 0.0.0"])
|
57
|
+
s.add_dependency(%q<fastercsv>, [">= 1.4.0"])
|
58
|
+
end
|
59
|
+
else
|
60
|
+
s.add_dependency(%q<fizx-rwget>, ["> 0.2.3"])
|
61
|
+
s.add_dependency(%q<fizx-parsley-ruby>, ["> 0.0.0"])
|
62
|
+
s.add_dependency(%q<activesupport>, ["> 0.0.0"])
|
63
|
+
s.add_dependency(%q<fastercsv>, [">= 1.4.0"])
|
64
|
+
end
|
65
|
+
end
|
data/hn.let
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
{
|
2
|
+
"headlines":[{
|
3
|
+
"title": ".title a",
|
4
|
+
"link": ".title a @href",
|
5
|
+
"comments": "match(.subtext a:nth-child(3), '\\d+')",
|
6
|
+
"user": ".subtext a:nth-child(2)",
|
7
|
+
"score": "match(.subtext span, '\\d+')",
|
8
|
+
"time": "match(.subtext, '\\d+\\s+\\w+\\s+ago')"
|
9
|
+
}]
|
10
|
+
}
|
data/lib/csvget.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "rwget"
|
3
|
+
require "parsley"
|
4
|
+
require "fastercsv"
|
5
|
+
require "activesupport"
|
6
|
+
require "fileutils"
|
7
|
+
|
8
|
+
class CSVStore
|
9
|
+
def initialize(options = {})
|
10
|
+
@output_folder = options[:prefix] || "."
|
11
|
+
@filters = options[:filter] || []
|
12
|
+
FileUtils.mkdir_p(@output_folder)
|
13
|
+
@parselets = (options[:parselets] || []).map{|path| Parsley.new(File.read(path)) }
|
14
|
+
@files = {}
|
15
|
+
@headers = {}
|
16
|
+
end
|
17
|
+
|
18
|
+
def put(host, tmpfile)
|
19
|
+
@parselets.each do |parselet|
|
20
|
+
begin
|
21
|
+
type = (`file "#{tmpfile.path}"` =~ /xml/i) ? :xml : :html
|
22
|
+
output = parselet.parse(:file => tmpfile.path, :input => type)
|
23
|
+
walk(output)
|
24
|
+
rescue ParsleyError => e
|
25
|
+
STDERR.puts "warning: #{e.message}"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def walk(data, prefix = nil)
|
31
|
+
data.each do |prefix, values|
|
32
|
+
values = [values] unless values.is_a?(Array)
|
33
|
+
file_name = File.join(@output_folder, "#{prefix}.csv")
|
34
|
+
h = @headers[prefix] ||= values.first.keys
|
35
|
+
should_write_headers = !File.exists?(file_name)
|
36
|
+
f = @files[prefix] ||= FasterCSV.open(file_name, "a", :headers => h, :write_headers => should_write_headers)
|
37
|
+
|
38
|
+
values.each do |hash|
|
39
|
+
arr = h.inject([]) do |memo, key|
|
40
|
+
memo << hash[key]
|
41
|
+
end
|
42
|
+
@row = FasterCSV::Row.new(h, arr)
|
43
|
+
@filters.each {|filter| eval(filter) }
|
44
|
+
f << @row
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def close
|
50
|
+
@files.each do |k, v|
|
51
|
+
v.close
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/jsonget.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "rwget"
|
3
|
+
require "parsley"
|
4
|
+
require "activesupport"
|
5
|
+
require "fileutils"
|
6
|
+
|
7
|
+
class JSONStore
|
8
|
+
def initialize(options = {})
|
9
|
+
@output_folder = options[:prefix] || "."
|
10
|
+
FileUtils.mkdir_p(@output_folder)
|
11
|
+
@parselets = (options[:parselets] || []).map{|path| Parsley.new(File.read(path)) }
|
12
|
+
@files = (options[:parselets] || []).map{|path| File.open("#{File.basename(path)}.json", "a") }
|
13
|
+
end
|
14
|
+
|
15
|
+
def put(host, tmpfile)
|
16
|
+
@parselets.zip(@files).each do |parselet, file|
|
17
|
+
begin
|
18
|
+
type = (`file "#{tmpfile.path}"` =~ /xml/i) ? :xml : :html
|
19
|
+
output = parselet.parse(:file => tmpfile.path, :input => type, :output => :json) + ","
|
20
|
+
rescue ParsleyError => e
|
21
|
+
STDERR.puts "warning: #{e.message}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def close
|
27
|
+
@files.map(&:close)
|
28
|
+
end
|
29
|
+
end
|
data/test/csvget_test.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
require File.dirname(__FILE__) + "/../lib/csvget"
|
3
|
+
require "fileutils"
|
4
|
+
|
5
|
+
class CsvgetTest < Test::Unit::TestCase
|
6
|
+
include FileUtils
|
7
|
+
|
8
|
+
# def setup
|
9
|
+
# @output = {"bill-state"=>"Welcome to Google Business Solutions", "links"=>["/", "https://adwords.google.com/select/Login?sourceid=awo&subid=us-en-et-bizsol-0-biz1-all&medium=link&hl=en_US"]}
|
10
|
+
# @links = ParseletLinks.new(:parselets => File.dirname(__FILE__) + "/foo.let")
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
# def test_bill_state
|
14
|
+
# bills = File.dirname(__FILE__) + "/bills.csv"
|
15
|
+
# @links.walk @output
|
16
|
+
# assert_equal File.read(File.dirname(__FILE__) + "/expected.csv"), File.read(bills)
|
17
|
+
# rm bills
|
18
|
+
# end
|
19
|
+
end
|
20
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvget
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kyle Maxwell
|
@@ -9,27 +9,80 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-10-
|
12
|
+
date: 2009-10-16 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
|
-
dependencies:
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: fizx-rwget
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.2.3
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: fizx-parsley-ruby
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.0.0
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: activesupport
|
37
|
+
type: :runtime
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">"
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 0.0.0
|
44
|
+
version:
|
45
|
+
- !ruby/object:Gem::Dependency
|
46
|
+
name: fastercsv
|
47
|
+
type: :runtime
|
48
|
+
version_requirement:
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 1.4.0
|
54
|
+
version:
|
55
|
+
description: Super easy to use (but lots of dependencies :/) parser
|
56
|
+
email: kyle@kylemaxwell.com
|
57
|
+
executables:
|
58
|
+
- csvget
|
59
|
+
- jsonget
|
20
60
|
extensions: []
|
21
61
|
|
22
|
-
extra_rdoc_files:
|
23
|
-
|
24
|
-
|
25
|
-
|
62
|
+
extra_rdoc_files:
|
63
|
+
- LICENSE
|
64
|
+
- README.rdoc
|
65
|
+
files:
|
66
|
+
- .gitignore
|
67
|
+
- CHANGELOG
|
68
|
+
- LICENSE
|
69
|
+
- README.rdoc
|
70
|
+
- Rakefile
|
71
|
+
- VERSION
|
72
|
+
- bin/csvget
|
73
|
+
- bin/jsonget
|
74
|
+
- csvget.gemspec
|
75
|
+
- hn.let
|
76
|
+
- lib/csvget.rb
|
77
|
+
- lib/jsonget.rb
|
78
|
+
- test/csvget_test.rb
|
26
79
|
has_rdoc: true
|
27
|
-
homepage:
|
80
|
+
homepage: http://github.com/fizx/csvget
|
28
81
|
licenses: []
|
29
82
|
|
30
83
|
post_install_message:
|
31
|
-
rdoc_options:
|
32
|
-
|
84
|
+
rdoc_options:
|
85
|
+
- --charset=UTF-8
|
33
86
|
require_paths:
|
34
87
|
- lib
|
35
88
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -50,6 +103,6 @@ rubyforge_project:
|
|
50
103
|
rubygems_version: 1.3.4
|
51
104
|
signing_key:
|
52
105
|
specification_version: 3
|
53
|
-
summary:
|
54
|
-
test_files:
|
55
|
-
|
106
|
+
summary: Uses parselets and rwget to generate csv files from websites
|
107
|
+
test_files:
|
108
|
+
- test/csvget_test.rb
|