kishu 0.0.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +15 -5
- data/README.md +28 -2
- data/lib/kishu.rb +1 -1
- data/lib/kishu/client.rb +2 -3
- data/lib/kishu/report.rb +58 -23
- data/lib/kishu/resolution_event.rb +1 -1
- data/lib/kishu/sushi.rb +46 -28
- data/lib/kishu/usage_event.rb +103 -80
- data/lib/kishu/version.rb +1 -1
- data/spec/report_spec.rb +72 -59
- data/spec/resolution_event_spec.rb +53 -53
- metadata +4 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 45d4e6ad0c84d9c3dfaef319fb4321bbaf9d6623b8ef810420af2b1e0779fea8
|
4
|
+
data.tar.gz: 9672d38f55253112c443b17da56e35c7469211ee9a90f9dbc8844e1a4bf67eae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7727dafcfe5f6fbda41a4a5d1d634419e6fdd23afb122c6452c9156e8644bef6999d1527ba74f42237a9f5ee0cf9388fb8231bcea35b99fce70e3a32c716fb28
|
7
|
+
data.tar.gz: 0dd429d67bae7d4557a04d2b4d4e19d729383571b33d6340dadb9220734888214ede9bac1fa37e9f18e1dc53eec07017f30b13539b899c98b0d75256ae931340
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -1,7 +1,17 @@
|
|
1
|
-
---
|
2
|
-
sudo: false
|
3
1
|
language: ruby
|
4
|
-
cache: bundler
|
5
2
|
rvm:
|
6
|
-
|
7
|
-
|
3
|
+
- 2.3.1
|
4
|
+
install:
|
5
|
+
- travis_retry bundle install
|
6
|
+
# script: bundle exec rspec
|
7
|
+
notifications:
|
8
|
+
slack: "$SLACK_TOKEN"
|
9
|
+
email: false
|
10
|
+
deploy:
|
11
|
+
provider: rubygems
|
12
|
+
api_key:
|
13
|
+
secure: "$RUBYGEMS_KEY"
|
14
|
+
gem: kishu
|
15
|
+
on:
|
16
|
+
tags: true
|
17
|
+
repo: datacite/kishu
|
data/README.md
CHANGED
@@ -22,7 +22,33 @@ Or install it yourself as:
|
|
22
22
|
|
23
23
|
## Usage
|
24
24
|
|
25
|
-
|
25
|
+
|
26
|
+
First setup your JWT and the host to send reports
|
27
|
+
|
28
|
+
```
|
29
|
+
export HUB_TOKEN="nsdfkhdfs89745fdfdsDFSFDS"
|
30
|
+
export HUB_URL="https://api.test.datacite.org"
|
31
|
+
|
32
|
+
```
|
33
|
+
|
34
|
+
You can generate a usage report locally with:
|
35
|
+
|
36
|
+
```shell
|
37
|
+
kishu sushi generate created_by:{YOUR DATACITE CLIENT ID}
|
38
|
+
```
|
39
|
+
|
40
|
+
To generate and push a usage report in JSON format following the Code of Practice for Usage Metrics, you can use the following command.
|
41
|
+
|
42
|
+
```shell
|
43
|
+
kishu sushi push created_by:{YOUR DATACITE CLIENT ID}
|
44
|
+
```
|
45
|
+
|
46
|
+
To stream a usage report in JSON format following the Code of Practice for Usage Metrics, you can use the following command. This option should be only used with reports with more than 50,000 datasets or larger than 10MB. We compress all reports that are streammed to the the MDC Hub.
|
47
|
+
|
48
|
+
```shell
|
49
|
+
kishu sushi stream created_by:{YOUR DATACITE CLIENT ID}
|
50
|
+
```
|
51
|
+
|
26
52
|
|
27
53
|
## Development
|
28
54
|
|
@@ -36,7 +62,7 @@ Follow along via [Github Issues](https://github.com/datacite/kishu/issues).
|
|
36
62
|
|
37
63
|
## Contributing
|
38
64
|
|
39
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
65
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/datacite/kishu. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
40
66
|
|
41
67
|
## License
|
42
68
|
|
data/lib/kishu.rb
CHANGED
@@ -27,4 +27,4 @@ AWS_ACCESS_KEY_ID = ENV['AWS_ACCESS_KEY_ID'] ? ENV['AWS_ACCESS_KEY_ID']
|
|
27
27
|
AWS_SECRET_ACCESS_KEY = ENV['AWS_SECRET_ACCESS_KEY'] ? ENV['AWS_SECRET_ACCESS_KEY'] : ""
|
28
28
|
ELASTIC_PASSWORD = ENV['ELASTIC_PASSWORD'] ? ENV['ELASTIC_PASSWORD'] : ""
|
29
29
|
LOGS_TAG = "[Resolution Logs]"
|
30
|
-
|
30
|
+
LOGGER = Logger.new(STDOUT)
|
data/lib/kishu/client.rb
CHANGED
@@ -11,7 +11,7 @@ module Kishu
|
|
11
11
|
def initialize
|
12
12
|
|
13
13
|
if ES_HOST == "localhost:9200" || ES_HOST == "elasticsearch:9200"
|
14
|
-
@client = Elasticsearch::Client.new(host: ES_HOST, user: "elastic", password: ELASTIC_PASSWORD, transport_options: { request: { timeout: 3600
|
14
|
+
@client = Elasticsearch::Client.new(host: ES_HOST, user: "elastic", password: ELASTIC_PASSWORD, transport_options: { request: { timeout: 3600}}) do |f|
|
15
15
|
f.adapter Faraday.default_adapter
|
16
16
|
end
|
17
17
|
else
|
@@ -62,8 +62,7 @@ module Kishu
|
|
62
62
|
query_string: {
|
63
63
|
query: "*"
|
64
64
|
}
|
65
|
-
}
|
66
|
-
aggregations: aggregations(options)
|
65
|
+
}
|
67
66
|
},
|
68
67
|
index: "resolutions"
|
69
68
|
).dig("hits","hits",0,"_source","logdate")
|
data/lib/kishu/report.rb
CHANGED
@@ -13,6 +13,7 @@ module Kishu
|
|
13
13
|
|
14
14
|
include Kishu::Base
|
15
15
|
include Kishu::Utils
|
16
|
+
attr_reader :uid, :total, :period
|
16
17
|
|
17
18
|
def initialize options={}
|
18
19
|
set_period
|
@@ -21,8 +22,17 @@ module Kishu
|
|
21
22
|
@report_id = options[:report_id] ? options[:report_id] : ""
|
22
23
|
@total = 0
|
23
24
|
@aggs_size = options[:aggs_size]
|
24
|
-
@chunk_size = options[:chunk_size]
|
25
25
|
@after = options[:after_key] ||=""
|
26
|
+
@enrich = options[:enrich]
|
27
|
+
@schema = options[:schema]
|
28
|
+
@encoding = options[:encoding]
|
29
|
+
@created_by = options[:created_by]
|
30
|
+
@report_size = options[:report_size]
|
31
|
+
if @schema == "resolution"
|
32
|
+
@enrich = false
|
33
|
+
@encoding = "gzip"
|
34
|
+
# @report_size = 20000
|
35
|
+
end
|
26
36
|
end
|
27
37
|
|
28
38
|
def report_period options={}
|
@@ -37,10 +47,13 @@ module Kishu
|
|
37
47
|
def get_events options={}
|
38
48
|
logger = Logger.new(STDOUT)
|
39
49
|
es_client = Client.new()
|
40
|
-
response = es_client.get({aggs_size: @aggs_size ||
|
50
|
+
response = es_client.get({aggs_size: @aggs_size || 400, after_key: options[:after_key] ||=""})
|
41
51
|
aggs = response.dig("aggregations","doi","buckets")
|
42
52
|
x = aggs.map do |agg|
|
43
|
-
|
53
|
+
case @schema
|
54
|
+
when "resolution" then ResolutionEvent.new(agg,{period: @period, report_id: @report_id}).wrap_event
|
55
|
+
when "usage" then UsageEvent.new(agg,{period: @period, report_id: @report_id}).wrap_event
|
56
|
+
end
|
44
57
|
end
|
45
58
|
after = response.dig("aggregations","doi").fetch("after_key",{"doi"=>nil}).dig("doi")
|
46
59
|
logger.info "After_key for pagination #{after}"
|
@@ -56,13 +69,24 @@ module Kishu
|
|
56
69
|
@datasets = @datasets.concat response[:data]
|
57
70
|
@after = response[:after]
|
58
71
|
@total += @datasets.size
|
59
|
-
generate_chunk_report if @datasets.size > @
|
72
|
+
generate_chunk_report if @datasets.size > @report_size
|
73
|
+
break if @after.nil?
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def generate
|
78
|
+
@datasets = []
|
79
|
+
loop do
|
80
|
+
response = get_events({after_key: @after ||=""})
|
81
|
+
@datasets = @datasets.concat response[:data]
|
82
|
+
@after = response[:after]
|
83
|
+
@total += @datasets.size
|
84
|
+
break if @total > 40000
|
60
85
|
break if @after.nil?
|
61
86
|
end
|
62
87
|
end
|
63
88
|
|
64
89
|
def compress report
|
65
|
-
# report = File.read(hash)
|
66
90
|
gzip = Zlib::GzipWriter.new(StringIO.new)
|
67
91
|
string = report.to_json
|
68
92
|
gzip << string
|
@@ -82,17 +106,12 @@ module Kishu
|
|
82
106
|
@datasets = []
|
83
107
|
end
|
84
108
|
|
85
|
-
def make_report options={}
|
86
|
-
generate_dataset_array
|
87
|
-
@logger.info "#{LOGS_TAG} Month of #{@period.dig("begin-date")} sent to Hub in report #{@uid} with stats for #{@total} datasets"
|
88
|
-
end
|
89
|
-
|
90
109
|
|
91
110
|
def set_period
|
92
|
-
report_period
|
111
|
+
rp = report_period
|
93
112
|
@period = {
|
94
|
-
"begin-date": Date.civil(
|
95
|
-
"end-date": Date.civil(
|
113
|
+
"begin-date": Date.civil(rp.year, rp.mon, 1).strftime("%Y-%m-%d"),
|
114
|
+
"end-date": Date.civil(rp.year, rp.mon, -1).strftime("%Y-%m-%d"),
|
96
115
|
}
|
97
116
|
end
|
98
117
|
|
@@ -100,13 +119,22 @@ module Kishu
|
|
100
119
|
uri = HUB_URL+'/reports'
|
101
120
|
puts uri
|
102
121
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
122
|
+
case @encoding
|
123
|
+
when "gzip" then
|
124
|
+
headers = {
|
125
|
+
content_type: "application/gzip",
|
126
|
+
content_encoding: 'gzip',
|
127
|
+
accept: 'gzip'
|
128
|
+
}
|
129
|
+
body = compress(report)
|
130
|
+
when "json" then
|
131
|
+
headers = {
|
132
|
+
content_type: "application/json",
|
133
|
+
accept: 'application/json'
|
134
|
+
}
|
135
|
+
body = report
|
136
|
+
end
|
137
|
+
|
110
138
|
n = 0
|
111
139
|
loop do
|
112
140
|
request = Maremma.post(uri, data: body,
|
@@ -114,6 +142,9 @@ module Kishu
|
|
114
142
|
headers: headers,
|
115
143
|
timeout: 100)
|
116
144
|
|
145
|
+
puts body
|
146
|
+
puts headers
|
147
|
+
|
117
148
|
@uid = request.body.dig("data","report","id")
|
118
149
|
@logger.info "#{LOGS_TAG} Hub response #{request.status} for Report finishing in #{@after}"
|
119
150
|
@logger.info "#{LOGS_TAG} Hub response #{@uid} for Report finishing in #{@after}"
|
@@ -132,12 +163,16 @@ module Kishu
|
|
132
163
|
end
|
133
164
|
|
134
165
|
def get_header
|
166
|
+
report_type = case @schema
|
167
|
+
when "resolution" then {release:"drl", title:"resolution report"}
|
168
|
+
when "usage" then {release:"rd1", title:"usage report"}
|
169
|
+
end
|
135
170
|
{
|
136
|
-
"report-name":
|
171
|
+
"report-name": report_type.dig(:title),
|
137
172
|
"report-id": "dsr",
|
138
|
-
release:
|
173
|
+
release: report_type.dig(:release),
|
139
174
|
created: Date.today.strftime("%Y-%m-%d"),
|
140
|
-
"created-by":
|
175
|
+
"created-by": @created_by,
|
141
176
|
"reporting-period": @period,
|
142
177
|
"report-filters": [],
|
143
178
|
"report-attributes": [],
|
data/lib/kishu/sushi.rb
CHANGED
@@ -14,24 +14,6 @@ module Kishu
|
|
14
14
|
include Kishu::Utils
|
15
15
|
|
16
16
|
|
17
|
-
desc "get sushi", "get resolution report"
|
18
|
-
# method_option :username, :default => ENV['MDS_USERNAME']
|
19
|
-
method_option :aggs_size, :type => :numeric, :default => 1000
|
20
|
-
method_option :month_year, :type => :string, :default => "2018-04"
|
21
|
-
def get
|
22
|
-
x =Report.new()
|
23
|
-
x.make_report(options)
|
24
|
-
|
25
|
-
end
|
26
|
-
|
27
|
-
method_option :month_year, :type => :string, :default => "2018-04"
|
28
|
-
method_option :after_key, :type => :string
|
29
|
-
def continue_report
|
30
|
-
x =Report.new()
|
31
|
-
x.generate_files(options)
|
32
|
-
|
33
|
-
end
|
34
|
-
|
35
17
|
desc "clean_all sushi", "clean index"
|
36
18
|
method_option :month_year, :type => :string, :default => "2018-04"
|
37
19
|
method_option :after_key, :type => :string
|
@@ -41,19 +23,55 @@ module Kishu
|
|
41
23
|
|
42
24
|
end
|
43
25
|
|
26
|
+
desc "stream a sushi", "stream report"
|
27
|
+
method_option :month_year, :type => :string, :default => "2018-04"
|
28
|
+
method_option :after_key, :type => :string
|
29
|
+
method_option :report_size, :type => :numeric, :default => 40000
|
30
|
+
method_option :aggs_size, :type => :numeric, :default => 500
|
31
|
+
method_option :schema, :type => :string, :default => "usage"
|
32
|
+
method_option :enrich, :type => :boolean, :default => false
|
33
|
+
method_option :encoding, :type => :string, :default => "gzip"
|
34
|
+
method_option :created_by, :type => :string, :default => "datacite"
|
35
|
+
def stream
|
36
|
+
fail "You need to set your JWT" if HUB_TOKEN.blank?
|
37
|
+
report = Report.new(options)
|
38
|
+
report.generate_dataset_array
|
39
|
+
LOGGER.info "#{LOGS_TAG} Month of #{report.period.dig("begin-date")} sent to Hub in report #{report.uid} with stats for #{report.total} datasets"
|
40
|
+
end
|
44
41
|
|
45
|
-
desc "
|
46
|
-
method_option :
|
47
|
-
method_option :
|
48
|
-
method_option :
|
49
|
-
method_option :
|
50
|
-
def
|
42
|
+
desc "generate a sushi", "generate report"
|
43
|
+
method_option :schema, :type => :string, :default => "usage"
|
44
|
+
method_option :enrich, :type => :boolean, :default => true
|
45
|
+
method_option :encoding, :type => :string, :default => "json"
|
46
|
+
method_option :created_by, :type => :string, :default => "datacite"
|
47
|
+
def generate
|
48
|
+
report = Report.new(options)
|
49
|
+
report.generate
|
50
|
+
file = report.merged_file
|
51
|
+
File.open(file,"w") do |f|
|
52
|
+
f.write(JSON.pretty_generate report.get_template)
|
53
|
+
end
|
54
|
+
LOGGER.info "#{LOGS_TAG} Month of #{report.period.dig("begin-date")} with stats for #{report.total} datasets"
|
55
|
+
end
|
56
|
+
|
57
|
+
desc "push a sushi", "push report"
|
58
|
+
method_option :schema, :type => :string, :default => "usage"
|
59
|
+
method_option :enrich, :type => :boolean, :default => true
|
60
|
+
method_option :encoding, :type => :string, :default => "json"
|
61
|
+
method_option :created_by, :type => :string, :default => "datacite"
|
62
|
+
def push
|
51
63
|
fail "You need to set your JWT" if HUB_TOKEN.blank?
|
52
|
-
|
53
|
-
|
54
|
-
|
64
|
+
report = Report.new(options)
|
65
|
+
report.generate
|
66
|
+
report.send_report report.get_template
|
67
|
+
LOGGER.info "#{LOGS_TAG} Month of #{report.period.dig("begin-date")} sent to Hub in report #{report.uid} with stats for #{report.total} datasets"
|
68
|
+
end
|
69
|
+
|
70
|
+
desc "is ES running", "check es is working"
|
71
|
+
def elasticsearch_results
|
72
|
+
es = Client.get({aggs_size: 10, after_key: ""})
|
73
|
+
puts es
|
55
74
|
end
|
56
75
|
|
57
|
-
|
58
76
|
end
|
59
77
|
end
|
data/lib/kishu/usage_event.rb
CHANGED
@@ -10,98 +10,114 @@ module Kishu
|
|
10
10
|
|
11
11
|
API_URL = "https://api.datacite.org"
|
12
12
|
|
13
|
-
def wrap_event(event)
|
14
|
-
|
15
|
-
|
16
|
-
# puts event.dig("unique").fetch("buckets", nil)
|
17
|
-
unique = event.dig("unique").fetch("buckets", [])
|
18
|
-
# puts unique[1].dig('key')
|
13
|
+
def wrap_event(event, options={})
|
14
|
+
totale_investigations = event.dig("totale").fetch("buckets", [])
|
15
|
+
unique_investigations = event.dig("unique").fetch("buckets", [])
|
19
16
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
17
|
+
unique_regular_investigations = unique_investigations.find_all {|access_method| access_method.fetch('key',"").match('regular') }
|
18
|
+
unique_machine_investigations = unique_investigations.find_all {|access_method| access_method.fetch('key',"").match('machine') }
|
19
|
+
total_regular_investigations = totale_investigations.find_all {|access_method| access_method.fetch('key',"").match('regular') }
|
20
|
+
total_machine_investigations = totale_investigations.find_all {|access_method| access_method.fetch('key',"").match('machine') }
|
21
|
+
|
22
|
+
totale_requests = event.dig("totale").fetch("buckets", [])
|
23
|
+
unique_requests = event.dig("unique").fetch("buckets", [])
|
24
|
+
|
25
|
+
unique_regular_requests = unique_requests.find_all {|access_method| access_method.fetch('key',"").match('regular') }
|
26
|
+
unique_machine_requests = unique_requests.find_all {|access_method| access_method.fetch('key',"").match('machine') }
|
27
|
+
total_regular_requests = totale_requests.find_all {|access_method| access_method.fetch('key',"").match('regular') }
|
28
|
+
total_machine_requests = totale_requests.find_all {|access_method| access_method.fetch('key',"").match('machine') }
|
24
29
|
|
25
30
|
dataset = {
|
26
31
|
doi: event.dig("key","doi"),
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
32
|
+
unique_counts_regular_investigations: unique_regular_investigations.empty? ? 0 : unique_regular_investigations.size,
|
33
|
+
unique_counts_machine_investigations: unique_machine_investigations.empty? ? 0 : unique_machine_investigations.size,
|
34
|
+
total_counts_regular_investigations: total_regular_investigations.empty? ? 0 : total_regular_investigations.dig(0,"doc_count"),
|
35
|
+
total_counts_machine_investigations: total_machine_investigations.empty? ? 0: total_machine_investigations.dig(0,"doc_count"),
|
36
|
+
unique_counts_regular_requests: unique_regular_requests.empty? ? 0 : unique_regular_requests.size,
|
37
|
+
unique_counts_machine_requests: unique_machine_requests.empty? ? 0 : unique_machine_requests.size,
|
38
|
+
total_counts_regular_requests: total_regular_requests.empty? ? 0 : total_regular_requests.dig(0,"doc_count"),
|
39
|
+
total_counts_machine_requests: total_machine_requests.empty? ? 0: total_machine_requests.dig(0,"doc_count")
|
31
40
|
}
|
32
41
|
|
33
|
-
|
34
42
|
# conn = Faraday.new(:url => API_URL)
|
35
43
|
logger = Logger.new(STDOUT)
|
36
44
|
logger.info event.fetch("doc_count")
|
37
45
|
|
38
|
-
|
39
|
-
logger.info dataset
|
40
|
-
doi = dataset.fetch(:doi,nil)
|
41
|
-
# json = conn.get "/works/#{doi}"
|
42
|
-
# json = conn.get do |req|
|
43
|
-
# req.url "/works/#{doi}"
|
44
|
-
# req.options.timeout = 50 # open/read timeout in seconds
|
45
|
-
# req.options.open_timeout = 20 # connection open timeout in seconds
|
46
|
-
# end
|
47
|
-
# json = Maremma.get "#{API_URL}/works/#{doi}"
|
48
|
-
# logger.info json.status
|
46
|
+
logger.info dataset
|
49
47
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
# data = json.body
|
54
|
-
data = {}
|
55
|
-
instances =[
|
56
|
-
{
|
57
|
-
count: dataset.fetch(:total_counts_regular),
|
58
|
-
"access-method": "regular",
|
59
|
-
"metric-type": "total-resolutions"
|
60
|
-
},
|
61
|
-
{
|
62
|
-
count: dataset.fetch(:unique_counts_regular),
|
63
|
-
"access-method": "regular",
|
64
|
-
"metric-type": "unique-resolutions"
|
65
|
-
},
|
66
|
-
{
|
67
|
-
count: dataset.fetch(:unique_counts_machine),
|
68
|
-
"access-method": "machine",
|
69
|
-
"metric-type": "unique-resolutions"
|
70
|
-
},
|
71
|
-
{
|
72
|
-
count: dataset.fetch(:total_counts_machine),
|
73
|
-
"access-method": "machine",
|
74
|
-
"metric-type": "total-resolutions"
|
75
|
-
},
|
76
|
-
]
|
77
|
-
|
78
|
-
instances.delete_if {|instance| instance.dig(:count) <= 0}
|
79
|
-
attributes = {} #data.dig("data","attributes")
|
80
|
-
resource_type = "" #attributes.fetch("resource-type-id",nil).nil? ? "dataset" : attributes.fetch("resource-type-id",nil)
|
48
|
+
doi = dataset.fetch(:doi,nil)
|
49
|
+
|
50
|
+
data = get_metadata doi
|
81
51
|
|
82
|
-
|
83
|
-
|
84
|
-
"
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
"
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
52
|
+
instances =[
|
53
|
+
{
|
54
|
+
"count": dataset[:total_counts_regular_investigations],
|
55
|
+
"access-method": "regular",
|
56
|
+
"metric-type": "total_dataset_investigations"
|
57
|
+
},
|
58
|
+
{
|
59
|
+
"count": dataset[:unique_counts_regular_investigations],
|
60
|
+
"access-method": "regular",
|
61
|
+
"metric-type": "unique_dataset_investigations"
|
62
|
+
},
|
63
|
+
{
|
64
|
+
"count": dataset[:unique_counts_machine_investigations],
|
65
|
+
"access-method": "machine",
|
66
|
+
"metric-type": "unique_dataset_investigations"
|
67
|
+
},
|
68
|
+
{
|
69
|
+
"count": dataset[:total_counts_machine_investigations],
|
70
|
+
"access-method": "machine",
|
71
|
+
"metric-type": "total_dataset_investigations"
|
72
|
+
},
|
73
|
+
{
|
74
|
+
"count": dataset[:total_counts_regular],
|
75
|
+
"access-method": "regular",
|
76
|
+
"metric-type": "total_dataset_requests"
|
77
|
+
},
|
78
|
+
{
|
79
|
+
"count": dataset[:unique_counts_regular],
|
80
|
+
"access-method": "regular",
|
81
|
+
"metric-type": "unique_dataset_requests"
|
82
|
+
},
|
83
|
+
{
|
84
|
+
"count": dataset[:unique_counts_machine],
|
85
|
+
"access-method": "machine",
|
86
|
+
"metric-type": "unique_dataset_requests"
|
87
|
+
},
|
88
|
+
{
|
89
|
+
"count": dataset[:total_counts_machine],
|
90
|
+
"access-method": "machine",
|
91
|
+
"metric-type": "total_dataset_requests"
|
103
92
|
}
|
104
|
-
|
93
|
+
]
|
94
|
+
instances.delete_if {|instance| instance.dig(:count) <= 0}
|
95
|
+
attributes = data.dig("data","attributes")
|
96
|
+
resource_type = attributes.fetch("resource-type-id",nil).nil? ? "dataset" : attributes.fetch("resource-type-id",nil)
|
97
|
+
|
98
|
+
instanced = {
|
99
|
+
"dataset-id" => [{type: "doi", value: dataset.fetch(:doi,nil)}],
|
100
|
+
"data-type" => resource_type,
|
101
|
+
"yop" => attributes.fetch("published",nil),
|
102
|
+
"uri" => attributes.fetch("identifier",nil),
|
103
|
+
"publisher" => attributes.fetch("container-title",nil),
|
104
|
+
"dataset-title": attributes.fetch("title",nil),
|
105
|
+
"publisher-id": [{
|
106
|
+
"type" => "client-id",
|
107
|
+
"value" => attributes.fetch("data-center-id",nil)
|
108
|
+
}],
|
109
|
+
"dataset-dates": [{
|
110
|
+
"type" => "pub-date",
|
111
|
+
"value" => attributes.fetch("published",nil)
|
112
|
+
}],
|
113
|
+
"dataset-contributors": attributes.fetch("author",[]).map { |a| get_authors(a) },
|
114
|
+
"platform" => attributes.fetch("data-center-id",nil),
|
115
|
+
"performance" => [{
|
116
|
+
"period" => @period,
|
117
|
+
"instance" => instances
|
118
|
+
}]
|
119
|
+
}
|
120
|
+
logger.info instanced
|
105
121
|
|
106
122
|
instanced
|
107
123
|
end
|
@@ -119,6 +135,13 @@ module Kishu
|
|
119
135
|
value: "" }
|
120
136
|
end
|
121
137
|
end
|
122
|
-
|
138
|
+
|
139
|
+
def get_metadata doi
|
140
|
+
json = Maremma.get "#{API_URL}/works/#{doi}"
|
141
|
+
logger.info json.status
|
142
|
+
return {} unless json.status == 200
|
143
|
+
logger.info "Success on getting metadata for #{doi}"
|
144
|
+
JSON.parse(json.body)
|
145
|
+
end
|
123
146
|
end
|
124
147
|
end
|
data/lib/kishu/version.rb
CHANGED
data/spec/report_spec.rb
CHANGED
@@ -3,33 +3,7 @@ require 'spec_helper'
|
|
3
3
|
|
4
4
|
describe Kishu::Report, vcr: true, :order => :defined do
|
5
5
|
let(:report) {Kishu::Report.new()}
|
6
|
-
|
7
|
-
context "when doi doesn't exist" do
|
8
|
-
|
9
|
-
it "should fail" do
|
10
|
-
# response = subject.get_events
|
11
|
-
# expect(response).to be({})
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
|
17
|
-
describe "get_events" do
|
18
|
-
context "" do
|
19
|
-
it "should return the data for one message" do
|
20
|
-
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
describe "generate_dataset_array" do
|
26
|
-
context "" do
|
27
|
-
it "" do
|
28
|
-
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
6
|
+
|
33
7
|
describe "compress" do
|
34
8
|
context "when json arrives compresses it correctly" do
|
35
9
|
|
@@ -39,38 +13,77 @@ describe Kishu::Report, vcr: true, :order => :defined do
|
|
39
13
|
end
|
40
14
|
end
|
41
15
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
16
|
+
# describe "get_template" do
|
17
|
+
# context "when Godd Usage report" do
|
18
|
+
# let(:usage_params) {{encoding:"json",schema:"usage",enrich:true,created_by:"DataOne"}}
|
19
|
+
# let(:datasets) {fixture_file("usage_datasest_array.json")}
|
20
|
+
# let(:period) {{}}
|
21
|
+
# it "generate a good template" do
|
22
|
+
# response = Report.new(usage_params)
|
23
|
+
# response.datasets = datasets
|
24
|
+
# expect(response.get_template.dig("report-header","release")).to eq("rd1")
|
25
|
+
# expect(response.get_template.dig("report-header","created_by")).to eq("DataOne")
|
26
|
+
# expect(response.get_template.dig("report-datasets").size).to eq(10)
|
27
|
+
# end
|
28
|
+
# end
|
29
|
+
|
30
|
+
# context "when Godd Resolution report" do
|
31
|
+
# let(:resolution_params) {{encoding:"json",schema:"resolution",enrich:false,created_by:"Dash"}}
|
32
|
+
# let(:datasets) {fixture_file("usage_datasest_array.json")}
|
33
|
+
# let(:period) {{}}
|
34
|
+
# it "generate a good template" do
|
35
|
+
# response = Report.new(resolution_params)
|
36
|
+
# response.datasets = datasets
|
37
|
+
# expect(response.get_template.dig("report-header","release")).to eq("dlr")
|
38
|
+
# expect(response.get_template.dig("report-header","created_by")).to eq("Dash")
|
39
|
+
# expect(response.get_template.dig("report-datasets").size).to eq(10)
|
40
|
+
# end
|
41
|
+
# end
|
42
|
+
# context "when Bad Resolution report" do
|
43
|
+
# let(:resolution_params) {{encoding:"json",schema:"resolution",enrich:false,created_by:"Dash"}}
|
44
|
+
# let(:datasets) {fixture_file("usage_datasest_array.json")}
|
45
|
+
# let(:period) {{}}
|
46
|
+
# it "generate a good template" do
|
47
|
+
# response = Report.new(resolution_params)
|
48
|
+
# response.datasets = datasets
|
49
|
+
# expect(response.get_template.dig("report-header","release")).to eq("dlr")
|
50
|
+
# expect(response.get_template.dig("report-header","created_by")).to eq("Dash")
|
51
|
+
# expect(response.get_template.dig("report-datasets").size).to eq(10)
|
52
|
+
# end
|
53
|
+
# end
|
54
|
+
# end
|
55
|
+
|
56
|
+
# describe "send_report" do
|
57
|
+
# context "when Godd Usage report" do
|
58
|
+
# let(:usage_params) {{encoding:"json",schema:"usage",enrich:true,created_by:"DataOne"}}
|
59
|
+
# let(:datasets) {fixture_file("usage_datasest_array.json")}
|
60
|
+
# let(:period) {{}}
|
61
|
+
# it "return 200" do
|
62
|
+
# report = Report.new(usage_params)
|
63
|
+
# report.datasets = datasets
|
64
|
+
# response = report.send_report
|
65
|
+
# expect(response.status).to eq(201)
|
66
|
+
# end
|
67
|
+
# end
|
68
|
+
# context "when Godd Resolution report" do
|
69
|
+
# let(:resolution_params) {{encoding:"json",schema:"resolution",enrich:false,created_by:"datacite"}}
|
70
|
+
# let(:datasets) {fixture_file("usage_datasest_array.json")}
|
71
|
+
# let(:period) {{}}
|
72
|
+
# it "return 200" do
|
73
|
+
# report = Report.new(resolution_params)
|
74
|
+
# report.datasets = datasets
|
75
|
+
# response = report.send_report
|
76
|
+
# expect(response.status).to eq(201)
|
77
|
+
# end
|
78
|
+
# end
|
79
|
+
|
80
|
+
# context "when the report is bad" do
|
81
|
+
# it "return error" do
|
82
|
+
# # status = Report.send_report report
|
83
|
+
# # expect(status).not_to eq("201")
|
84
|
+
# end
|
85
|
+
# end
|
86
|
+
# end
|
74
87
|
|
75
88
|
end
|
76
89
|
|
@@ -3,59 +3,59 @@ require 'spec_helper'
|
|
3
3
|
|
4
4
|
describe Kishu::ResolutionEvent, vcr: true, :order => :defined do
|
5
5
|
|
6
|
-
describe "wrap_event" do
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
end
|
6
|
+
# describe "wrap_event" do
|
7
|
+
# context "when doi exist" do
|
8
|
+
# let(:event){create(:resolution_event)}
|
9
|
+
# it "should wrap correctly" do
|
10
|
+
# puts event.dig(:totale)
|
11
|
+
# response = subject.wrap_event(event)
|
12
|
+
# expect(response).to eq({})
|
13
|
+
# end
|
14
|
+
# end
|
15
|
+
# context "when doi has not type assigned" do
|
16
|
+
# let(:event) {{
|
17
|
+
# "key": {
|
18
|
+
# "doi": "10.13130//3192"
|
19
|
+
# },
|
20
|
+
# "doc_count": 2,
|
21
|
+
# "totale": {
|
22
|
+
# "doc_count_error_upper_bound": 0,
|
23
|
+
# "sum_other_doc_count": 0,
|
24
|
+
# "buckets": [
|
25
|
+
# {
|
26
|
+
# "key": "10.13130//3192_regular",
|
27
|
+
# "doc_count": 2
|
28
|
+
# }
|
29
|
+
# ]
|
30
|
+
# },
|
31
|
+
# "unqiue": {
|
32
|
+
# "doc_count_error_upper_bound": 0,
|
33
|
+
# "sum_other_doc_count": 0,
|
34
|
+
# "buckets": [
|
35
|
+
# {
|
36
|
+
# "key": "2018-04-15_12_10.13130//3192_5.168.132.15_Mozilla/5.0 (Linux; Android 6.0.1; SAMSUNG SM-A520F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/5.4 Chrome/51.0.2704.106 Mobile Safari/537.36_regular",
|
37
|
+
# "doc_count": 1
|
38
|
+
# },
|
39
|
+
# {
|
40
|
+
# "key": "2018-04-15_16_10.13130//3192_151.15.225.227_Mozilla/5.0 (iPhone; CPU iPhone OS 11_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.0 Mobile/15E148 Safari/604.1_regular",
|
41
|
+
# "doc_count": 1
|
42
|
+
# }
|
43
|
+
# ]
|
44
|
+
# }
|
45
|
+
# }}
|
46
|
+
# it "should return an dataset event" do
|
47
|
+
# response = ResolutionEvent.wrap_event(event)
|
48
|
+
# expect(response).to eq({})
|
49
|
+
# end
|
50
|
+
# end
|
51
|
+
# context "when event is empty" do
|
52
|
+
# let(:event) {""}
|
53
|
+
# it "should fail" do
|
54
|
+
# response = ResolutionEvent.wrap_event(event)
|
55
|
+
# expect(response).to eq({})
|
56
|
+
# end
|
57
|
+
# end
|
58
|
+
# end
|
59
59
|
|
60
60
|
|
61
61
|
describe "" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kishu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kristian Garza
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-12-
|
11
|
+
date: 2018-12-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -387,14 +387,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
387
387
|
version: '0'
|
388
388
|
requirements: []
|
389
389
|
rubyforge_project:
|
390
|
-
rubygems_version: 2.7.
|
390
|
+
rubygems_version: 2.7.8
|
391
391
|
signing_key:
|
392
392
|
specification_version: 4
|
393
393
|
summary: Client for DOI Resolution Logs processing pipeline
|
394
|
-
test_files:
|
395
|
-
- spec/factories/default.rb
|
396
|
-
- spec/fixtures/vcr_cassettes/Kishu_Sushi/wrap_event/when_doi_doesn_t_exist/should_fail.yml
|
397
|
-
- spec/kishu_spec.rb
|
398
|
-
- spec/report_spec.rb
|
399
|
-
- spec/resolution_event_spec.rb
|
400
|
-
- spec/spec_helper.rb
|
394
|
+
test_files: []
|