birdwatcher 0.1.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/db/migrations/016_add_posted_at_to_urls.rb +14 -0
- data/lib/birdwatcher/commands/status.rb +2 -2
- data/lib/birdwatcher/concerns/persistence.rb +1 -1
- data/lib/birdwatcher/concerns/util.rb +19 -0
- data/lib/birdwatcher/modules/reporting/csv.rb +71 -0
- data/lib/birdwatcher/modules/reporting/json.rb +71 -0
- data/lib/birdwatcher/modules/urls/crawl.rb +2 -2
- data/lib/birdwatcher/util.rb +12 -0
- data/lib/birdwatcher/version.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9967717a3d089165d8c2184b7028cf965d747277
|
4
|
+
data.tar.gz: 098b63db5bf37e20bb3a5d72a515c01b1dad42e7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6bd0fb95954c3493cb7b3adce85a68f2b86a439b87fb81477e00ba7f0fce3b12b6c2e7cfb38b9744c19d5345d1a7748c0aaa11e138c0028981337fd167918688
|
7
|
+
data.tar.gz: fe6e8192f569912ab7c3e3e6d7e51496fd408c48a11b713d883d387e155a2154a637414153da14ccf243af1f949dc4969dbfdc46c1175e968827710becf2f468
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# Change Log
|
2
|
+
All notable changes to this project will be documented in this file.
|
3
|
+
This project adheres to [Semantic Versioning](http://semver.org/).
|
4
|
+
|
5
|
+
## [Unreleased]
|
6
|
+
|
7
|
+
## [0.3.1]
|
8
|
+
### Added
|
9
|
+
- New module `reporting/json` to export data from an SQL query to a file in JSON format
|
10
|
+
- New module `reporting/csv` to export data from an SQL query to a file in CSV format
|
11
|
+
- This changelog
|
12
|
+
|
13
|
+
### Changed
|
14
|
+
- `posted_at` column added to `urls` for better and easier ordering
|
15
|
+
|
16
|
+
### Fixed
|
17
|
+
- make `status search` command case insensitive
|
@@ -66,13 +66,13 @@ USAGE
|
|
66
66
|
end
|
67
67
|
|
68
68
|
def search_statuses
|
69
|
-
search_term = arguments[1..-1].join(" ")
|
69
|
+
search_term = arguments[1..-1].join(" ").downcase
|
70
70
|
if search_term.empty?
|
71
71
|
error("You must provide a search term")
|
72
72
|
return false
|
73
73
|
end
|
74
74
|
statuses = current_workspace.statuses_dataset
|
75
|
-
.where("text LIKE ?", "%#{search_term}%")
|
75
|
+
.where("lower(text) LIKE ?", "%#{search_term}%")
|
76
76
|
.order(Sequel.desc(:posted_at))
|
77
77
|
.eager(:user)
|
78
78
|
.limit(1000)
|
@@ -61,7 +61,7 @@ module Birdwatcher
|
|
61
61
|
if status.urls?
|
62
62
|
status.urls.each do |url|
|
63
63
|
expanded_url = Birdwatcher::Util.strip_control_characters(url.expanded_url.to_s)
|
64
|
-
db_url = current_workspace.urls_dataset.first(:url => expanded_url) || current_workspace.add_url(:url => expanded_url)
|
64
|
+
db_url = current_workspace.urls_dataset.first(:url => expanded_url) || current_workspace.add_url(:url => expanded_url, :posted_at => status.created_at)
|
65
65
|
db_status.add_url(db_url)
|
66
66
|
end
|
67
67
|
end
|
@@ -114,6 +114,25 @@ module Birdwatcher
|
|
114
114
|
Birdwatcher::Util.excerpt(text, max_length, omission)
|
115
115
|
end
|
116
116
|
|
117
|
+
# Create a more human readable representation of a number
|
118
|
+
#
|
119
|
+
# @param number [Numeric] number to make human readable
|
120
|
+
#
|
121
|
+
# @example
|
122
|
+
# number_to_human_size(1024)
|
123
|
+
# #=> "1KB"
|
124
|
+
#
|
125
|
+
# number_to_human_size(2097152)
|
126
|
+
# #=> "2MB"
|
127
|
+
#
|
128
|
+
# number_to_human_size(3650722201)
|
129
|
+
# #=> "3.4GB"
|
130
|
+
#
|
131
|
+
# @return [String] Human readable number
|
132
|
+
def number_to_human_size(number)
|
133
|
+
Birdwatcher::Util.number_to_human_size(number)
|
134
|
+
end
|
135
|
+
|
117
136
|
# Suppress any potential output to STDOUT
|
118
137
|
#
|
119
138
|
# Used in cases where certain libraries or methods might output unwanted
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Birdwatcher
|
2
|
+
module Modules
|
3
|
+
module Reporting
|
4
|
+
class Csv < Birdwatcher::Module
|
5
|
+
self.meta = {
|
6
|
+
:name => "CSV Exporter",
|
7
|
+
:description => "Export result from SQL query to a CSV file",
|
8
|
+
:author => "Michael Henriksen <michenriksen@neomailbox.ch>",
|
9
|
+
:options => {
|
10
|
+
"DEST" => {
|
11
|
+
:value => nil,
|
12
|
+
:description => "Destination file",
|
13
|
+
:required => true
|
14
|
+
},
|
15
|
+
"QUERY" => {
|
16
|
+
:value => nil,
|
17
|
+
:description => "SQL query to execute",
|
18
|
+
:required => true
|
19
|
+
},
|
20
|
+
"HEADERS" => {
|
21
|
+
:value => true,
|
22
|
+
:description => "Add CSV headers to the file",
|
23
|
+
:required => false,
|
24
|
+
:boolean => true
|
25
|
+
}
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
def self.info
|
30
|
+
<<-INFO
|
31
|
+
The CSV exporter can write the results of an SQL query to a file in CSV format.
|
32
|
+
|
33
|
+
#{'IMPORTANT:'.bold} The module does not limit the data returned from the query
|
34
|
+
to the currently active workspace, the query will need to take that in to
|
35
|
+
consideration if necessary.
|
36
|
+
INFO
|
37
|
+
end
|
38
|
+
|
39
|
+
def run
|
40
|
+
result = nil
|
41
|
+
rows = nil
|
42
|
+
headers = nil
|
43
|
+
csv = nil
|
44
|
+
destination = option_setting("DEST")
|
45
|
+
task("Executing SQL query...") do
|
46
|
+
begin
|
47
|
+
result = database[option_setting("QUERY")]
|
48
|
+
rows = result.map { |r| r.to_hash.values }
|
49
|
+
headers = result.columns.map { |c| c.to_s }
|
50
|
+
rescue Sequel::DatabaseError => e
|
51
|
+
error("Syntax error: #{e.message}")
|
52
|
+
return false
|
53
|
+
end
|
54
|
+
end
|
55
|
+
task("Generating CSV...") do
|
56
|
+
csv = CSV.generate(:write_headers => option_setting("HEADERS"), :headers => headers) do |doc|
|
57
|
+
rows.each { |r| doc << r }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
task("Writing #{pluralize(rows.count, 'row', 'rows')} to file...") do
|
61
|
+
File.open(destination, "w") do |f|
|
62
|
+
f.write(csv)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
file_size = number_to_human_size(File.size(destination))
|
66
|
+
info("Wrote #{file_size.bold} to #{destination.bold}")
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Birdwatcher
|
2
|
+
module Modules
|
3
|
+
module Reporting
|
4
|
+
class Json < Birdwatcher::Module
|
5
|
+
self.meta = {
|
6
|
+
:name => "JSON Exporter",
|
7
|
+
:description => "Export result from SQL query to a JSON file",
|
8
|
+
:author => "Michael Henriksen <michenriksen@neomailbox.ch>",
|
9
|
+
:options => {
|
10
|
+
"DEST" => {
|
11
|
+
:value => nil,
|
12
|
+
:description => "Destination file",
|
13
|
+
:required => true
|
14
|
+
},
|
15
|
+
"QUERY" => {
|
16
|
+
:value => nil,
|
17
|
+
:description => "SQL query to execute",
|
18
|
+
:required => true
|
19
|
+
},
|
20
|
+
"PRETTY_FORMATTING" => {
|
21
|
+
:value => false,
|
22
|
+
:description => "Output pretty formatted JSON",
|
23
|
+
:required => false,
|
24
|
+
:boolean => true
|
25
|
+
}
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
def self.info
|
30
|
+
<<-INFO
|
31
|
+
The JSON exporter can write the results of an SQL query to a file in JSON format.
|
32
|
+
|
33
|
+
#{'IMPORTANT:'.bold} The module does not limit the data returned from the query
|
34
|
+
to the currently active workspace, the query will need to take that in to
|
35
|
+
consideration if necessary.
|
36
|
+
INFO
|
37
|
+
end
|
38
|
+
|
39
|
+
def run
|
40
|
+
result = nil
|
41
|
+
rows = nil
|
42
|
+
json = nil
|
43
|
+
destination = option_setting("DEST")
|
44
|
+
task("Executing SQL query...") do
|
45
|
+
begin
|
46
|
+
result = database[option_setting("QUERY")]
|
47
|
+
rows = result.map { |r| r.to_hash }
|
48
|
+
rescue Sequel::DatabaseError => e
|
49
|
+
error("Syntax error: #{e.message}")
|
50
|
+
return false
|
51
|
+
end
|
52
|
+
end
|
53
|
+
task("Generating JSON...") do
|
54
|
+
if option_setting("PRETTY_FORMATTING")
|
55
|
+
json = JSON.pretty_generate(rows)
|
56
|
+
else
|
57
|
+
json = JSON.generate(rows)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
task("Writing #{pluralize(rows.count, 'row', 'rows')} to file...") do
|
61
|
+
File.open(destination, "w") do |f|
|
62
|
+
f.write(json)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
file_size = number_to_human_size(File.size(destination))
|
66
|
+
info("Wrote #{file_size.bold} to #{destination.bold}")
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -80,11 +80,11 @@ INFO
|
|
80
80
|
if option_setting("RETRY_FAILED")
|
81
81
|
urls = current_workspace.urls_dataset
|
82
82
|
.where("crawled_at IS NULL or (crawled_at IS NOT NULL AND http_status IS NULL)")
|
83
|
-
.order(Sequel.desc(:
|
83
|
+
.order(Sequel.desc(:posted_at))
|
84
84
|
else
|
85
85
|
urls = current_workspace.urls_dataset
|
86
86
|
.where(:crawled_at => nil)
|
87
|
-
.order(Sequel.desc(:
|
87
|
+
.order(Sequel.desc(:posted_at))
|
88
88
|
end
|
89
89
|
if urls.empty?
|
90
90
|
error("There are currently no URLs in this workspace")
|
data/lib/birdwatcher/util.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
module Birdwatcher
|
2
2
|
module Util
|
3
|
+
HUMAN_PREFIXES = %w(TB GB MB KB B).freeze
|
4
|
+
|
3
5
|
def self.time_ago_in_words(time)
|
4
6
|
return "a very very long time ago" if time.year < 1800
|
5
7
|
secs = Time.now - time
|
@@ -61,6 +63,16 @@ module Birdwatcher
|
|
61
63
|
text[0..max_length] + omission
|
62
64
|
end
|
63
65
|
|
66
|
+
def self.number_to_human_size(number)
|
67
|
+
s = number.to_f
|
68
|
+
i = HUMAN_PREFIXES.length - 1
|
69
|
+
while s > 512 && i > 0
|
70
|
+
i -= 1
|
71
|
+
s /= 1024
|
72
|
+
end
|
73
|
+
((s > 9 || s.modulo(1) < 0.1 ? "%d" : "%.1f") % s) + HUMAN_PREFIXES[i]
|
74
|
+
end
|
75
|
+
|
64
76
|
def self.suppress_output(&block)
|
65
77
|
original_stdout = $stdout
|
66
78
|
$stdout = fake = StringIO.new
|
data/lib/birdwatcher/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: birdwatcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Henrikesn
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sequel
|
@@ -296,6 +296,7 @@ extra_rdoc_files: []
|
|
296
296
|
files:
|
297
297
|
- ".gitignore"
|
298
298
|
- ".travis.yml"
|
299
|
+
- CHANGELOG.md
|
299
300
|
- Gemfile
|
300
301
|
- LICENSE.txt
|
301
302
|
- README.md
|
@@ -319,6 +320,7 @@ files:
|
|
319
320
|
- db/migrations/013_create_influencers_users.rb
|
320
321
|
- db/migrations/014_create_influencees.rb
|
321
322
|
- db/migrations/015_create_influencees_users.rb
|
323
|
+
- db/migrations/016_add_posted_at_to_urls.rb
|
322
324
|
- exe/birdwatcher
|
323
325
|
- lib/birdwatcher.rb
|
324
326
|
- lib/birdwatcher/command.rb
|
@@ -353,6 +355,8 @@ files:
|
|
353
355
|
- lib/birdwatcher/klout_client.rb
|
354
356
|
- lib/birdwatcher/kml.rb
|
355
357
|
- lib/birdwatcher/module.rb
|
358
|
+
- lib/birdwatcher/modules/reporting/csv.rb
|
359
|
+
- lib/birdwatcher/modules/reporting/json.rb
|
356
360
|
- lib/birdwatcher/modules/statuses/kml.rb
|
357
361
|
- lib/birdwatcher/modules/statuses/sentiment.rb
|
358
362
|
- lib/birdwatcher/modules/statuses/word_cloud.rb
|