twitter_to_csv 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +1 -1
- data/lib/twitter_to_csv/csv_builder.rb +17 -8
- data/lib/twitter_to_csv/version.rb +1 -1
- metadata +12 -14
data/README.markdown
CHANGED
@@ -37,7 +37,7 @@ You can extract URLs from the tweet into their own columns by including `--url-c
|
|
37
37
|
|
38
38
|
## Field names
|
39
39
|
|
40
|
-
Use `--sample-fields 1000
|
40
|
+
Use `--sample-fields 1000` to output the occurrence count of different Twitter fields, like so:
|
41
41
|
|
42
42
|
twitter_to_csv --username <your twitter username> --password <your twitter password> --sample-fields 1000
|
43
43
|
|
@@ -14,10 +14,10 @@ module TwitterToCsv
|
|
14
14
|
@num_samples = 0
|
15
15
|
end
|
16
16
|
|
17
|
-
def run
|
17
|
+
def run(&block)
|
18
18
|
log_csv_header if options[:csv] && !options[:csv_appending]
|
19
19
|
if options[:replay_from_file]
|
20
|
-
replay_from options[:replay_from_file]
|
20
|
+
replay_from options[:replay_from_file], &block
|
21
21
|
else
|
22
22
|
begin
|
23
23
|
TwitterWatcher.new(options).run do |status|
|
@@ -35,10 +35,11 @@ module TwitterToCsv
|
|
35
35
|
end
|
36
36
|
end
|
37
37
|
|
38
|
-
def handle_status(status)
|
38
|
+
def handle_status(status, &block)
|
39
39
|
if (options[:require_english] && is_english?(status)) || !options[:require_english]
|
40
40
|
log_json(status) if options[:json]
|
41
41
|
log_csv(status) if options[:csv]
|
42
|
+
yield_status(status, &block) if block
|
42
43
|
sample_fields(status) if options[:sample_fields]
|
43
44
|
STDERR.puts "Logging: #{status['text']}" if options[:verbose]
|
44
45
|
end
|
@@ -55,7 +56,15 @@ module TwitterToCsv
|
|
55
56
|
end
|
56
57
|
|
57
58
|
def log_csv(status)
|
58
|
-
|
59
|
+
options[:csv].puts output_row(status).to_csv(:encoding => 'UTF-8', :force_quotes => true)
|
60
|
+
end
|
61
|
+
|
62
|
+
def yield_status(status, &block)
|
63
|
+
block.call output_row(status)
|
64
|
+
end
|
65
|
+
|
66
|
+
def output_row(status)
|
67
|
+
row = options[:fields].map do |field|
|
59
68
|
field.split(".").inject(status) { |memo, segment|
|
60
69
|
memo && memo[segment]
|
61
70
|
}.to_s
|
@@ -63,18 +72,18 @@ module TwitterToCsv
|
|
63
72
|
|
64
73
|
if options[:url_columns] && options[:url_columns] > 0
|
65
74
|
urls = status['text'].scan(URL_REGEX).flatten.compact
|
66
|
-
options[:url_columns].times { |i|
|
75
|
+
options[:url_columns].times { |i| row << urls[i].to_s }
|
67
76
|
end
|
68
77
|
|
69
|
-
|
78
|
+
row
|
70
79
|
end
|
71
80
|
|
72
|
-
def replay_from(filename)
|
81
|
+
def replay_from(filename, &block)
|
73
82
|
File.open(filename, "r") do |file|
|
74
83
|
until file.eof?
|
75
84
|
line = file.readline
|
76
85
|
next if line =~ /\A------SEP.RATOR------\Z/i
|
77
|
-
handle_status JSON.parse(line)
|
86
|
+
handle_status JSON.parse(line), &block
|
78
87
|
end
|
79
88
|
end
|
80
89
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_to_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03-
|
12
|
+
date: 2012-03-31 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &81656020 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *81656020
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: twitter-stream
|
27
|
-
requirement: &
|
27
|
+
requirement: &81655810 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *81655810
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: em-http-request
|
38
|
-
requirement: &
|
38
|
+
requirement: &81655600 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *81655600
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: unsupervised-language-detection
|
49
|
-
requirement: &
|
49
|
+
requirement: &81655390 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *81655390
|
58
58
|
description: ''
|
59
59
|
email:
|
60
60
|
- andrew@iterationlabs.com
|
@@ -97,10 +97,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
97
97
|
version: '0'
|
98
98
|
requirements: []
|
99
99
|
rubyforge_project: twitter_to_csv
|
100
|
-
rubygems_version: 1.8.
|
100
|
+
rubygems_version: 1.8.16
|
101
101
|
signing_key:
|
102
102
|
specification_version: 3
|
103
103
|
summary: Dump the Twitter streaming API to a CSV or JSON file
|
104
|
-
test_files:
|
105
|
-
- spec/csv_builder_spec.rb
|
106
|
-
- spec/spec_helper.rb
|
104
|
+
test_files: []
|