twitter_to_csv 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +1 -1
- data/lib/twitter_to_csv/csv_builder.rb +17 -8
- data/lib/twitter_to_csv/version.rb +1 -1
- metadata +12 -14
data/README.markdown
CHANGED
@@ -37,7 +37,7 @@ You can extract URLs from the tweet into their own columns by including `--url-c
|
|
37
37
|
|
38
38
|
## Field names
|
39
39
|
|
40
|
-
Use `--sample-fields 1000
|
40
|
+
Use `--sample-fields 1000` to output the occurrence count of different Twitter fields, like so:
|
41
41
|
|
42
42
|
twitter_to_csv --username <your twitter username> --password <your twitter password> --sample-fields 1000
|
43
43
|
|
@@ -14,10 +14,10 @@ module TwitterToCsv
|
|
14
14
|
@num_samples = 0
|
15
15
|
end
|
16
16
|
|
17
|
-
def run
|
17
|
+
def run(&block)
|
18
18
|
log_csv_header if options[:csv] && !options[:csv_appending]
|
19
19
|
if options[:replay_from_file]
|
20
|
-
replay_from options[:replay_from_file]
|
20
|
+
replay_from options[:replay_from_file], &block
|
21
21
|
else
|
22
22
|
begin
|
23
23
|
TwitterWatcher.new(options).run do |status|
|
@@ -35,10 +35,11 @@ module TwitterToCsv
|
|
35
35
|
end
|
36
36
|
end
|
37
37
|
|
38
|
-
def handle_status(status)
|
38
|
+
def handle_status(status, &block)
|
39
39
|
if (options[:require_english] && is_english?(status)) || !options[:require_english]
|
40
40
|
log_json(status) if options[:json]
|
41
41
|
log_csv(status) if options[:csv]
|
42
|
+
yield_status(status, &block) if block
|
42
43
|
sample_fields(status) if options[:sample_fields]
|
43
44
|
STDERR.puts "Logging: #{status['text']}" if options[:verbose]
|
44
45
|
end
|
@@ -55,7 +56,15 @@ module TwitterToCsv
|
|
55
56
|
end
|
56
57
|
|
57
58
|
def log_csv(status)
|
58
|
-
|
59
|
+
options[:csv].puts output_row(status).to_csv(:encoding => 'UTF-8', :force_quotes => true)
|
60
|
+
end
|
61
|
+
|
62
|
+
def yield_status(status, &block)
|
63
|
+
block.call output_row(status)
|
64
|
+
end
|
65
|
+
|
66
|
+
def output_row(status)
|
67
|
+
row = options[:fields].map do |field|
|
59
68
|
field.split(".").inject(status) { |memo, segment|
|
60
69
|
memo && memo[segment]
|
61
70
|
}.to_s
|
@@ -63,18 +72,18 @@ module TwitterToCsv
|
|
63
72
|
|
64
73
|
if options[:url_columns] && options[:url_columns] > 0
|
65
74
|
urls = status['text'].scan(URL_REGEX).flatten.compact
|
66
|
-
options[:url_columns].times { |i|
|
75
|
+
options[:url_columns].times { |i| row << urls[i].to_s }
|
67
76
|
end
|
68
77
|
|
69
|
-
|
78
|
+
row
|
70
79
|
end
|
71
80
|
|
72
|
-
def replay_from(filename)
|
81
|
+
def replay_from(filename, &block)
|
73
82
|
File.open(filename, "r") do |file|
|
74
83
|
until file.eof?
|
75
84
|
line = file.readline
|
76
85
|
next if line =~ /\A------SEP.RATOR------\Z/i
|
77
|
-
handle_status JSON.parse(line)
|
86
|
+
handle_status JSON.parse(line), &block
|
78
87
|
end
|
79
88
|
end
|
80
89
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_to_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03-
|
12
|
+
date: 2012-03-31 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &81656020 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *81656020
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: twitter-stream
|
27
|
-
requirement: &
|
27
|
+
requirement: &81655810 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *81655810
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: em-http-request
|
38
|
-
requirement: &
|
38
|
+
requirement: &81655600 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *81655600
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: unsupervised-language-detection
|
49
|
-
requirement: &
|
49
|
+
requirement: &81655390 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *81655390
|
58
58
|
description: ''
|
59
59
|
email:
|
60
60
|
- andrew@iterationlabs.com
|
@@ -97,10 +97,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
97
97
|
version: '0'
|
98
98
|
requirements: []
|
99
99
|
rubyforge_project: twitter_to_csv
|
100
|
-
rubygems_version: 1.8.
|
100
|
+
rubygems_version: 1.8.16
|
101
101
|
signing_key:
|
102
102
|
specification_version: 3
|
103
103
|
summary: Dump the Twitter streaming API to a CSV or JSON file
|
104
|
-
test_files:
|
105
|
-
- spec/csv_builder_spec.rb
|
106
|
-
- spec/spec_helper.rb
|
104
|
+
test_files: []
|