twitter_ebooks 3.0.8 → 3.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/bin/ebooks +46 -0
- data/lib/twitter_ebooks/model.rb +1 -1
- data/lib/twitter_ebooks/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3f4312e16bb18ae646bbca7559d3044ecc7b00f7
|
4
|
+
data.tar.gz: 9fdcb9b3eb8cf9fc4449699ed20661a8b5ef8d2a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 39aee9bc75ffe627b24d1f6ae29f7bfd1c3093e3295dfebf5fdafb67b8ed649f417811112c5086f50449bb0a21ee899e7e3a5896e76a0de32a524da548e05a39
|
7
|
+
data.tar.gz: 8136aec89545928bd5a04dd94fd14c078752a913f098a32a6ee96fd2c065f9d03ed14dcdc32550ce17d9f3815aaaee20153b2e24f617903e34502faa85e19a3b
|
data/README.md
CHANGED
@@ -93,7 +93,7 @@ The underlying streaming and REST clients from the [twitter gem](https://github.
|
|
93
93
|
|
94
94
|
## Archiving accounts
|
95
95
|
|
96
|
-
twitter\_ebooks comes with a syncing tool to download and then incrementally update a local json archive of a user's tweets
|
96
|
+
twitter\_ebooks comes with a syncing tool to download and then incrementally update a local json archive of a user's tweets (in this case, my good friend @0xabad1dea):
|
97
97
|
|
98
98
|
``` zsh
|
99
99
|
➜ ebooks archive 0xabad1dea corpus/0xabad1dea.json
|
data/bin/ebooks
CHANGED
@@ -114,6 +114,52 @@ STR
|
|
114
114
|
log "Corpuses consumed to #{outpath}"
|
115
115
|
end
|
116
116
|
|
117
|
+
HELP.jsonify = <<-STR
|
118
|
+
Usage: ebooks jsonify <tweets.csv> [tweets.csv2] [...]
|
119
|
+
|
120
|
+
Takes a csv twitter archive and converts it to json.
|
121
|
+
STR
|
122
|
+
|
123
|
+
def self.jsonify(paths)
|
124
|
+
if paths.empty?
|
125
|
+
log usage
|
126
|
+
exit
|
127
|
+
end
|
128
|
+
|
129
|
+
paths.each do |path|
|
130
|
+
name = File.basename(path).split('.')[0]
|
131
|
+
new_path = name + ".json"
|
132
|
+
|
133
|
+
tweets = []
|
134
|
+
id = nil
|
135
|
+
if path.split('.')[-1] == "csv" #from twitter archive
|
136
|
+
csv_archive = CSV.read(path, :headers=>:first_row)
|
137
|
+
tweets = csv_archive.map do |tweet|
|
138
|
+
{ text: tweet['text'], id: tweet['tweet_id'] }
|
139
|
+
end
|
140
|
+
else
|
141
|
+
File.read(path).split("\n").each do |l|
|
142
|
+
if l.start_with?('# ')
|
143
|
+
id = l.split('# ')[-1]
|
144
|
+
else
|
145
|
+
tweet = { text: l }
|
146
|
+
if id
|
147
|
+
tweet[:id] = id
|
148
|
+
id = nil
|
149
|
+
end
|
150
|
+
tweets << tweet
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
File.open(new_path, 'w') do |f|
|
156
|
+
log "Writing #{tweets.length} tweets to #{new_path}"
|
157
|
+
f.write(JSON.pretty_generate(tweets))
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
|
117
163
|
HELP.gen = <<-STR
|
118
164
|
Usage: ebooks gen <model_path> [input]
|
119
165
|
|
data/lib/twitter_ebooks/model.rb
CHANGED
@@ -118,7 +118,7 @@ module Ebooks
|
|
118
118
|
tweet[text_col]
|
119
119
|
end
|
120
120
|
else
|
121
|
-
log "Reading plaintext corpus from #{path}"
|
121
|
+
log "Reading plaintext corpus from #{path} (if this is a json or csv file, please rename the file with an extension and reconsume)"
|
122
122
|
lines = content.split("\n")
|
123
123
|
end
|
124
124
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_ebooks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaiden Mispy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-03-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|