birdwatcher 0.1.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/db/migrations/016_add_posted_at_to_urls.rb +14 -0
- data/lib/birdwatcher/commands/status.rb +2 -2
- data/lib/birdwatcher/concerns/persistence.rb +1 -1
- data/lib/birdwatcher/concerns/util.rb +19 -0
- data/lib/birdwatcher/modules/reporting/csv.rb +71 -0
- data/lib/birdwatcher/modules/reporting/json.rb +71 -0
- data/lib/birdwatcher/modules/urls/crawl.rb +2 -2
- data/lib/birdwatcher/util.rb +12 -0
- data/lib/birdwatcher/version.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9967717a3d089165d8c2184b7028cf965d747277
|
4
|
+
data.tar.gz: 098b63db5bf37e20bb3a5d72a515c01b1dad42e7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6bd0fb95954c3493cb7b3adce85a68f2b86a439b87fb81477e00ba7f0fce3b12b6c2e7cfb38b9744c19d5345d1a7748c0aaa11e138c0028981337fd167918688
|
7
|
+
data.tar.gz: fe6e8192f569912ab7c3e3e6d7e51496fd408c48a11b713d883d387e155a2154a637414153da14ccf243af1f949dc4969dbfdc46c1175e968827710becf2f468
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# Change Log
|
2
|
+
All notable changes to this project will be documented in this file.
|
3
|
+
This project adheres to [Semantic Versioning](http://semver.org/).
|
4
|
+
|
5
|
+
## [Unreleased]
|
6
|
+
|
7
|
+
## [0.3.1]
|
8
|
+
### Added
|
9
|
+
- New module `reporting/json` to export data from an SQL query to a file in JSON format
|
10
|
+
- New module `reporting/csv` to export data from an SQL query to a file in CSV format
|
11
|
+
- This changelog
|
12
|
+
|
13
|
+
### Changed
|
14
|
+
- `posted_at` column added to `urls` for better and easier ordering
|
15
|
+
|
16
|
+
### Fixed
|
17
|
+
- make `status search` command case insensitive
|
@@ -66,13 +66,13 @@ USAGE
|
|
66
66
|
end
|
67
67
|
|
68
68
|
def search_statuses
|
69
|
-
search_term = arguments[1..-1].join(" ")
|
69
|
+
search_term = arguments[1..-1].join(" ").downcase
|
70
70
|
if search_term.empty?
|
71
71
|
error("You must provide a search term")
|
72
72
|
return false
|
73
73
|
end
|
74
74
|
statuses = current_workspace.statuses_dataset
|
75
|
-
.where("text LIKE ?", "%#{search_term}%")
|
75
|
+
.where("lower(text) LIKE ?", "%#{search_term}%")
|
76
76
|
.order(Sequel.desc(:posted_at))
|
77
77
|
.eager(:user)
|
78
78
|
.limit(1000)
|
@@ -61,7 +61,7 @@ module Birdwatcher
|
|
61
61
|
if status.urls?
|
62
62
|
status.urls.each do |url|
|
63
63
|
expanded_url = Birdwatcher::Util.strip_control_characters(url.expanded_url.to_s)
|
64
|
-
db_url = current_workspace.urls_dataset.first(:url => expanded_url) || current_workspace.add_url(:url => expanded_url)
|
64
|
+
db_url = current_workspace.urls_dataset.first(:url => expanded_url) || current_workspace.add_url(:url => expanded_url, :posted_at => status.created_at)
|
65
65
|
db_status.add_url(db_url)
|
66
66
|
end
|
67
67
|
end
|
@@ -114,6 +114,25 @@ module Birdwatcher
|
|
114
114
|
Birdwatcher::Util.excerpt(text, max_length, omission)
|
115
115
|
end
|
116
116
|
|
117
|
+
# Create a more human readable representation of a number
|
118
|
+
#
|
119
|
+
# @param number [Numeric] number to make human readable
|
120
|
+
#
|
121
|
+
# @example
|
122
|
+
# number_to_human_size(1024)
|
123
|
+
# #=> "1KB"
|
124
|
+
#
|
125
|
+
# number_to_human_size(2097152)
|
126
|
+
# #=> "2MB"
|
127
|
+
#
|
128
|
+
# number_to_human_size(3650722201)
|
129
|
+
# #=> "3.4GB"
|
130
|
+
#
|
131
|
+
# @return [String] Human readable number
|
132
|
+
def number_to_human_size(number)
|
133
|
+
Birdwatcher::Util.number_to_human_size(number)
|
134
|
+
end
|
135
|
+
|
117
136
|
# Suppress any potential output to STDOUT
|
118
137
|
#
|
119
138
|
# Used in cases where certain libraries or methods might output unwanted
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Birdwatcher
|
2
|
+
module Modules
|
3
|
+
module Reporting
|
4
|
+
class Csv < Birdwatcher::Module
|
5
|
+
self.meta = {
|
6
|
+
:name => "CSV Exporter",
|
7
|
+
:description => "Export result from SQL query to a CSV file",
|
8
|
+
:author => "Michael Henriksen <michenriksen@neomailbox.ch>",
|
9
|
+
:options => {
|
10
|
+
"DEST" => {
|
11
|
+
:value => nil,
|
12
|
+
:description => "Destination file",
|
13
|
+
:required => true
|
14
|
+
},
|
15
|
+
"QUERY" => {
|
16
|
+
:value => nil,
|
17
|
+
:description => "SQL query to execute",
|
18
|
+
:required => true
|
19
|
+
},
|
20
|
+
"HEADERS" => {
|
21
|
+
:value => true,
|
22
|
+
:description => "Add CSV headers to the file",
|
23
|
+
:required => false,
|
24
|
+
:boolean => true
|
25
|
+
}
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
def self.info
|
30
|
+
<<-INFO
|
31
|
+
The CSV exporter can write the results of an SQL query to a file in CSV format.
|
32
|
+
|
33
|
+
#{'IMPORTANT:'.bold} The module does not limit the data returned from the query
|
34
|
+
to the currently active workspace, the query will need to take that in to
|
35
|
+
consideration if necessary.
|
36
|
+
INFO
|
37
|
+
end
|
38
|
+
|
39
|
+
def run
|
40
|
+
result = nil
|
41
|
+
rows = nil
|
42
|
+
headers = nil
|
43
|
+
csv = nil
|
44
|
+
destination = option_setting("DEST")
|
45
|
+
task("Executing SQL query...") do
|
46
|
+
begin
|
47
|
+
result = database[option_setting("QUERY")]
|
48
|
+
rows = result.map { |r| r.to_hash.values }
|
49
|
+
headers = result.columns.map { |c| c.to_s }
|
50
|
+
rescue Sequel::DatabaseError => e
|
51
|
+
error("Syntax error: #{e.message}")
|
52
|
+
return false
|
53
|
+
end
|
54
|
+
end
|
55
|
+
task("Generating CSV...") do
|
56
|
+
csv = CSV.generate(:write_headers => option_setting("HEADERS"), :headers => headers) do |doc|
|
57
|
+
rows.each { |r| doc << r }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
task("Writing #{pluralize(rows.count, 'row', 'rows')} to file...") do
|
61
|
+
File.open(destination, "w") do |f|
|
62
|
+
f.write(csv)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
file_size = number_to_human_size(File.size(destination))
|
66
|
+
info("Wrote #{file_size.bold} to #{destination.bold}")
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Birdwatcher
|
2
|
+
module Modules
|
3
|
+
module Reporting
|
4
|
+
class Json < Birdwatcher::Module
|
5
|
+
self.meta = {
|
6
|
+
:name => "JSON Exporter",
|
7
|
+
:description => "Export result from SQL query to a JSON file",
|
8
|
+
:author => "Michael Henriksen <michenriksen@neomailbox.ch>",
|
9
|
+
:options => {
|
10
|
+
"DEST" => {
|
11
|
+
:value => nil,
|
12
|
+
:description => "Destination file",
|
13
|
+
:required => true
|
14
|
+
},
|
15
|
+
"QUERY" => {
|
16
|
+
:value => nil,
|
17
|
+
:description => "SQL query to execute",
|
18
|
+
:required => true
|
19
|
+
},
|
20
|
+
"PRETTY_FORMATTING" => {
|
21
|
+
:value => false,
|
22
|
+
:description => "Output pretty formatted JSON",
|
23
|
+
:required => false,
|
24
|
+
:boolean => true
|
25
|
+
}
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
def self.info
|
30
|
+
<<-INFO
|
31
|
+
The JSON exporter can write the results of an SQL query to a file in JSON format.
|
32
|
+
|
33
|
+
#{'IMPORTANT:'.bold} The module does not limit the data returned from the query
|
34
|
+
to the currently active workspace, the query will need to take that in to
|
35
|
+
consideration if necessary.
|
36
|
+
INFO
|
37
|
+
end
|
38
|
+
|
39
|
+
def run
|
40
|
+
result = nil
|
41
|
+
rows = nil
|
42
|
+
json = nil
|
43
|
+
destination = option_setting("DEST")
|
44
|
+
task("Executing SQL query...") do
|
45
|
+
begin
|
46
|
+
result = database[option_setting("QUERY")]
|
47
|
+
rows = result.map { |r| r.to_hash }
|
48
|
+
rescue Sequel::DatabaseError => e
|
49
|
+
error("Syntax error: #{e.message}")
|
50
|
+
return false
|
51
|
+
end
|
52
|
+
end
|
53
|
+
task("Generating JSON...") do
|
54
|
+
if option_setting("PRETTY_FORMATTING")
|
55
|
+
json = JSON.pretty_generate(rows)
|
56
|
+
else
|
57
|
+
json = JSON.generate(rows)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
task("Writing #{pluralize(rows.count, 'row', 'rows')} to file...") do
|
61
|
+
File.open(destination, "w") do |f|
|
62
|
+
f.write(json)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
file_size = number_to_human_size(File.size(destination))
|
66
|
+
info("Wrote #{file_size.bold} to #{destination.bold}")
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -80,11 +80,11 @@ INFO
|
|
80
80
|
if option_setting("RETRY_FAILED")
|
81
81
|
urls = current_workspace.urls_dataset
|
82
82
|
.where("crawled_at IS NULL or (crawled_at IS NOT NULL AND http_status IS NULL)")
|
83
|
-
.order(Sequel.desc(:
|
83
|
+
.order(Sequel.desc(:posted_at))
|
84
84
|
else
|
85
85
|
urls = current_workspace.urls_dataset
|
86
86
|
.where(:crawled_at => nil)
|
87
|
-
.order(Sequel.desc(:
|
87
|
+
.order(Sequel.desc(:posted_at))
|
88
88
|
end
|
89
89
|
if urls.empty?
|
90
90
|
error("There are currently no URLs in this workspace")
|
data/lib/birdwatcher/util.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
module Birdwatcher
|
2
2
|
module Util
|
3
|
+
HUMAN_PREFIXES = %w(TB GB MB KB B).freeze
|
4
|
+
|
3
5
|
def self.time_ago_in_words(time)
|
4
6
|
return "a very very long time ago" if time.year < 1800
|
5
7
|
secs = Time.now - time
|
@@ -61,6 +63,16 @@ module Birdwatcher
|
|
61
63
|
text[0..max_length] + omission
|
62
64
|
end
|
63
65
|
|
66
|
+
def self.number_to_human_size(number)
|
67
|
+
s = number.to_f
|
68
|
+
i = HUMAN_PREFIXES.length - 1
|
69
|
+
while s > 512 && i > 0
|
70
|
+
i -= 1
|
71
|
+
s /= 1024
|
72
|
+
end
|
73
|
+
((s > 9 || s.modulo(1) < 0.1 ? "%d" : "%.1f") % s) + HUMAN_PREFIXES[i]
|
74
|
+
end
|
75
|
+
|
64
76
|
def self.suppress_output(&block)
|
65
77
|
original_stdout = $stdout
|
66
78
|
$stdout = fake = StringIO.new
|
data/lib/birdwatcher/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: birdwatcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Henrikesn
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sequel
|
@@ -296,6 +296,7 @@ extra_rdoc_files: []
|
|
296
296
|
files:
|
297
297
|
- ".gitignore"
|
298
298
|
- ".travis.yml"
|
299
|
+
- CHANGELOG.md
|
299
300
|
- Gemfile
|
300
301
|
- LICENSE.txt
|
301
302
|
- README.md
|
@@ -319,6 +320,7 @@ files:
|
|
319
320
|
- db/migrations/013_create_influencers_users.rb
|
320
321
|
- db/migrations/014_create_influencees.rb
|
321
322
|
- db/migrations/015_create_influencees_users.rb
|
323
|
+
- db/migrations/016_add_posted_at_to_urls.rb
|
322
324
|
- exe/birdwatcher
|
323
325
|
- lib/birdwatcher.rb
|
324
326
|
- lib/birdwatcher/command.rb
|
@@ -353,6 +355,8 @@ files:
|
|
353
355
|
- lib/birdwatcher/klout_client.rb
|
354
356
|
- lib/birdwatcher/kml.rb
|
355
357
|
- lib/birdwatcher/module.rb
|
358
|
+
- lib/birdwatcher/modules/reporting/csv.rb
|
359
|
+
- lib/birdwatcher/modules/reporting/json.rb
|
356
360
|
- lib/birdwatcher/modules/statuses/kml.rb
|
357
361
|
- lib/birdwatcher/modules/statuses/sentiment.rb
|
358
362
|
- lib/birdwatcher/modules/statuses/word_cloud.rb
|