kishu 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,83 @@
1
+ require 'faraday'
2
+ require 'logger'
3
+ require 'maremma'
4
+ require 'sucker_punch'
5
+
6
+ require_relative 'utils'
7
+ require_relative 'base'
8
+ require_relative 'lagotto_job'
9
+
10
+ module Kishu
11
+ class ResolutionEvent
12
+
13
+ include Kishu::Utils
14
+
15
+ def initialize(event, options={})
16
+ @event = event
17
+ @logger = Logger.new(STDOUT)
18
+ @period = options[:period]
19
+ end
20
+
21
+ def wrap_event
22
+ # puts "------------------ \n"
23
+ totale = @event.dig("totale").fetch("buckets", [])
24
+ # puts @event.dig("unique").fetch("buckets", nil)
25
+ unique = @event.dig("unique").fetch("buckets", [])
26
+ # puts unique[1].dig('key')
27
+
28
+ unique_regular = unique.find_all {|access_method| access_method.fetch('key',"").match('regular') }
29
+ unique_machine = unique.find_all {|access_method| access_method.fetch('key',"").match('machine') }
30
+ total_regular = totale.find_all {|access_method| access_method.fetch('key',"").match('regular') }
31
+ total_machine = totale.find_all {|access_method| access_method.fetch('key',"").match('machine') }
32
+
33
+ dataset = {
34
+ doi: @event.dig("key","doi"),
35
+ unique_counts_regular: unique_regular.empty? ? 0 : unique_regular.size,
36
+ unique_counts_machine: unique_machine.empty? ? 0 : unique_machine.size,
37
+ total_counts_regular: total_regular.empty? ? 0 : total_regular.dig(0,"doc_count"),
38
+ total_counts_machine: total_machine.empty? ? 0 : total_machine.dig(0,"doc_count")
39
+ }
40
+
41
+
42
+ @doi = dataset.fetch(:doi,nil)
43
+
44
+ data = {}
45
+ instances =[
46
+ {
47
+ "count" => dataset.fetch(:total_counts_regular),
48
+ "access-method" => "regular",
49
+ "metric-type" => "total-resolutions"
50
+ },
51
+ {
52
+ "count" => dataset.fetch(:unique_counts_regular),
53
+ "access-method" => "regular",
54
+ "metric-type" => "unique-resolutions"
55
+ },
56
+ {
57
+ "count" => dataset.fetch(:unique_counts_machine),
58
+ "access-method" => "machine",
59
+ "metric-type" => "unique-resolutions"
60
+ },
61
+ {
62
+ "count" => dataset.fetch(:total_counts_machine),
63
+ "access-method" => "machine",
64
+ "metric-type" => "total-resolutions"
65
+ },
66
+ ]
67
+
68
+ instances.delete_if {|instance| instance.dig("count") < 1}
69
+
70
+
71
+ instanced = {
72
+ "dataset-id" => [{"type" => "doi", "value"=> dataset.fetch(:doi,nil)}],
73
+ "performance" => [{
74
+ "period"=> @period,
75
+ "instance"=> instances
76
+ }]
77
+ }
78
+ instanced
79
+ end
80
+
81
+
82
+ end
83
+ end
@@ -0,0 +1,24 @@
1
+
2
+ require 'aws-sdk-s3'
3
+
4
+ require_relative 'utils'
5
+ require_relative 'base'
6
+
7
+ module Kishu
8
+ class S3
9
+
10
+ def initialize
11
+ s3 = Aws::S3::Client.new
12
+ resp = s3.list_buckets
13
+ resp.buckets.map(&:name)
14
+ end
15
+
16
+ def download_logs
17
+ resp = s3.get_object(
18
+ response_target: '/logs',
19
+ bucket: S3_RESOLUTION_LOGS_BUCKET,
20
+ key: 'object-key')
21
+ resp.metadata
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,59 @@
1
+
2
+ require 'thor'
3
+
4
+
5
+ require_relative 'resolution_event'
6
+ require_relative 'report'
7
+ require_relative 'utils'
8
+ require_relative 'base'
9
+
10
+ module Kishu
11
+ class Sushi < Thor
12
+
13
+ include Kishu::Base
14
+ include Kishu::Utils
15
+
16
+
17
+ desc "get sushi", "get resolution report"
18
+ # method_option :username, :default => ENV['MDS_USERNAME']
19
+ method_option :aggs_size, :type => :numeric, :default => 1000
20
+ method_option :month_year, :type => :string, :default => "2018-04"
21
+ def get
22
+ x =Report.new()
23
+ x.make_report(options)
24
+
25
+ end
26
+
27
+ method_option :month_year, :type => :string, :default => "2018-04"
28
+ method_option :after_key, :type => :string
29
+ def continue_report
30
+ x =Report.new()
31
+ x.generate_files(options)
32
+
33
+ end
34
+
35
+ desc "clean_all sushi", "clean index"
36
+ method_option :month_year, :type => :string, :default => "2018-04"
37
+ method_option :after_key, :type => :string
38
+ def clean_all
39
+ x =Client.new()
40
+ x.clear_index
41
+
42
+ end
43
+
44
+
45
+ desc "send_report_events sushi", "send_report_events index"
46
+ method_option :month_year, :type => :string, :default => "2018-04"
47
+ method_option :after_key, :type => :string
48
+ method_option :chunk_size, :type => :numeric, :default => 40000
49
+ method_option :aggs_size, :type => :numeric, :default => 500
50
+ def send_report_events
51
+ fail "You need to set your JWT" if HUB_TOKEN.blank?
52
+ x =Report.new(options)
53
+ x.make_report(options)
54
+
55
+ end
56
+
57
+
58
+ end
59
+ end
@@ -0,0 +1,124 @@
1
+ require 'faraday'
2
+ require 'logger'
3
+ require 'maremma'
4
+
5
+ require_relative 'utils'
6
+ require_relative 'base'
7
+
8
+ module Kishu
9
+ class UsageEvent
10
+
11
+ API_URL = "https://api.datacite.org"
12
+
13
+ def wrap_event(event)
14
+ puts "------------------ \n"
15
+ totale = event.dig("totale").fetch("buckets", [])
16
+ # puts event.dig("unique").fetch("buckets", nil)
17
+ unique = event.dig("unique").fetch("buckets", [])
18
+ # puts unique[1].dig('key')
19
+
20
+ unique_regular = unique.find_all {|access_method| access_method.fetch('key',"").match('regular') }
21
+ unique_machine = unique.find_all {|access_method| access_method.fetch('key',"").match('machine') }
22
+ total_regular = totale.find_all {|access_method| access_method.fetch('key',"").match('regular') }
23
+ total_machine = totale.find_all {|access_method| access_method.fetch('key',"").match('machine') }
24
+
25
+ dataset = {
26
+ doi: event.dig("key","doi"),
27
+ unique_counts_regular: unique_regular.empty? ? 0 : unique_regular.size,
28
+ unique_counts_machine: unique_machine.empty? ? 0 : unique_machine.size,
29
+ total_counts_regular: total_regular.empty? ? 0 : total_regular.dig(0,"doc_count"),
30
+ total_counts_machine: total_machine.empty? ? 0: total_machine.dig(0,"doc_count")
31
+ }
32
+
33
+
34
+ # conn = Faraday.new(:url => API_URL)
35
+ logger = Logger.new(STDOUT)
36
+ logger.info event.fetch("doc_count")
37
+
38
+ # arr = dois.map do |dataset|
39
+ logger.info dataset
40
+ doi = dataset.fetch(:doi,nil)
41
+ # json = conn.get "/works/#{doi}"
42
+ # json = conn.get do |req|
43
+ # req.url "/works/#{doi}"
44
+ # req.options.timeout = 50 # open/read timeout in seconds
45
+ # req.options.open_timeout = 20 # connection open timeout in seconds
46
+ # end
47
+ # json = Maremma.get "#{API_URL}/works/#{doi}"
48
+ # logger.info json.status
49
+
50
+ # return {} unless json.status == 200
51
+ # logger.info "Success on getting metadata for #{doi}"
52
+ # data = JSON.parse(json.body)
53
+ # data = json.body
54
+ data = {}
55
+ instances =[
56
+ {
57
+ count: dataset.fetch(:total_counts_regular),
58
+ "access-method": "regular",
59
+ "metric-type": "total-resolutions"
60
+ },
61
+ {
62
+ count: dataset.fetch(:unique_counts_regular),
63
+ "access-method": "regular",
64
+ "metric-type": "unique-resolutions"
65
+ },
66
+ {
67
+ count: dataset.fetch(:unique_counts_machine),
68
+ "access-method": "machine",
69
+ "metric-type": "unique-resolutions"
70
+ },
71
+ {
72
+ count: dataset.fetch(:total_counts_machine),
73
+ "access-method": "machine",
74
+ "metric-type": "total-resolutions"
75
+ },
76
+ ]
77
+
78
+ instances.delete_if {|instance| instance.dig(:count) <= 0}
79
+ attributes = {} #data.dig("data","attributes")
80
+ resource_type = "" #attributes.fetch("resource-type-id",nil).nil? ? "dataset" : attributes.fetch("resource-type-id",nil)
81
+
82
+ instanced = {
83
+ "dataset-id" => [{type: "doi", value: dataset.fetch(:doi,nil)}],
84
+ "data-type" => resource_type,
85
+ yop: attributes.fetch("published",nil),
86
+ uri: attributes.fetch("identifier",nil),
87
+ publisher: attributes.fetch("container-title",nil),
88
+ "dataset-title": attributes.fetch("title",nil),
89
+ "publisher-id": [{
90
+ type: "client-id",
91
+ value: attributes.fetch("data-center-id",nil)
92
+ }],
93
+ "dataset-dates": [{
94
+ type: "pub-date",
95
+ value: attributes.fetch("published",nil)
96
+ }],
97
+ "dataset-contributors": attributes.fetch("author",[]).map { |a| get_authors(a) },
98
+ platform: "datacite",
99
+ performance: [{
100
+ period: @period,
101
+ instance: instances
102
+ }]
103
+ }
104
+ logger.info instanced
105
+
106
+ instanced
107
+ end
108
+
109
+
110
+ def get_authors author
111
+ if (author.key?("given") && author.key?("family"))
112
+ { type: "name",
113
+ value: author.fetch("given",nil)+" "+author.fetch("family",nil) }
114
+ elsif author.key?("literal")
115
+ { type: "name",
116
+ value: author.fetch("literal",nil) }
117
+ else
118
+ { type: "name",
119
+ value: "" }
120
+ end
121
+ end
122
+
123
+ end
124
+ end
@@ -0,0 +1,115 @@
1
+ require "bolognese"
2
+ require "time"
3
+
4
+ module Kishu
5
+ module Utils
6
+ include ::Bolognese::MetadataUtils
7
+
8
+ def clean_tmp
9
+ system("rm tmp/datasets-*.json")
10
+ puts "/tmp Files deleted"
11
+ end
12
+
13
+ def merged_file
14
+ "reports/datacite_resolution_report_#{report_period.strftime("%Y-%m")}_2.json"
15
+ end
16
+
17
+ def encoded_file
18
+ "reports/datacite_resolution_report_#{report_period.strftime("%Y-%m")}_encoded.json"
19
+ end
20
+
21
+ def generate_header_footer
22
+ report_header = '{"report-header": '+get_header.to_json.to_s+',"report-datasets": [ '+"\n"
23
+
24
+ File.open("tmp/datasets-00-report-header.json","w") do |f|
25
+ f.write(report_header)
26
+ end
27
+ report_footer = ']'+"\n"+'}'
28
+
29
+ File.open("tmp/datasets-zz99-report-footer.json","w") do |f|
30
+ f.write(report_footer)
31
+ end
32
+ end
33
+
34
+ def get_authors author
35
+ if (author.key?("given") && author.key?("family"))
36
+ { type: "name",
37
+ value: author.fetch("given",nil)+" "+author.fetch("family",nil) }
38
+ elsif author.key?("literal")
39
+ { type: "name",
40
+ value: author.fetch("literal",nil) }
41
+ else
42
+ { type: "name",
43
+ value: "" }
44
+ end
45
+ end
46
+
47
+ def format_instance data, options={}
48
+ obj = get_metadata(options[:dataset_id])
49
+ subj = {id:options[:report_id]}
50
+ # subj = "https://api.datacite.org/reports/0cb326d1-e3e7-4cc1-9d86-7c5f3d5ca310"
51
+ relation_type = "#{data[:"metric-type"]}-#{data[:"access-method"]}"
52
+ source_id = "datacite-resolution"
53
+ source_token = SOURCE_TOKEN
54
+ {
55
+ "data" => {
56
+ "type" => "events",
57
+ "attributes" => {
58
+ "message-action" => "create",
59
+ "subj-id" => options[:report_id],
60
+ "total" => data[:count],
61
+ "obj-id" => options[:dataset_id],
62
+ "relation-type-id" => relation_type.to_s.dasherize,
63
+ "source-id" => source_id,
64
+ "source-token" => source_token,
65
+ "occurred-at" => Time.now.iso8601, # need modify
66
+ "timestamp" => Time.now.iso8601,
67
+ "license" => LICENSE,
68
+ "subj" => subj,
69
+ "obj" => obj } }}
70
+ end
71
+
72
+ def get_metadata id
73
+ doi = doi_from_url(id)
74
+ return {} unless doi.present?
75
+
76
+ url = API_URL + "/dois/#{doi}"
77
+ response = Maremma.get(url)
78
+ return {} if response.status != 200
79
+
80
+ attributes = response.body.dig("data", "attributes")
81
+ relationships = response.body.dig("data", "relationships")
82
+
83
+ resource_type = response.body.dig("data", "relationships")
84
+ resource_type_general = relationships.dig("resource-type", "data", "id")
85
+ type = Bolognese::Utils::CR_TO_SO_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::DC_TO_SO_TRANSLATIONS[resource_type_general.to_s.underscore.camelcase(first_letter = :upper)] || "CreativeWork"
86
+ author = Array.wrap(attributes["author"]).map do |a|
87
+ {
88
+ "given_name" => a["givenName"],
89
+ "family_name" => a["familyName"],
90
+ "name" => a["familyName"].present? ? nil : a["name"] }.compact
91
+ end
92
+ client_id = relationships.dig("client", "data", "id")
93
+
94
+ {
95
+ "id" => id,
96
+ "type" => type.underscore.dasherize,
97
+ "name" => attributes["title"],
98
+ "author" => author,
99
+ "publisher" => attributes["publisher"],
100
+ "version" => attributes["version"],
101
+ "date_published" => attributes["published"],
102
+ "date_modified" => attributes["updated"],
103
+ "registrant_id" => "datacite.#{client_id}" }.compact
104
+ end
105
+
106
+ def encoded
107
+ Base64.strict_encode64(compress_merged_file)
108
+ end
109
+
110
+ def checksum
111
+ Digest::SHA256.hexdigest(compress_merged_file)
112
+ end
113
+
114
+ end
115
+ end
@@ -0,0 +1,3 @@
1
+ module Kishu
2
+ VERSION = "0.0.0"
3
+ end
@@ -0,0 +1,71 @@
1
+
2
+ FactoryBot.define do
3
+ factory :resolution_event do
4
+ period "begin_date": "2018-03-01", "end_date": "2018-03-31"
5
+ event
6
+ {
7
+ "key": "10.5065/D6V1236Q",
8
+ "doc_count": 5566,
9
+ "total": {
10
+ "doc_count_error_upper_bound": 0,
11
+ "sum_other_doc_count": 0,
12
+ "buckets": [
13
+ {
14
+ "key": "machine",
15
+ "doc_count": 5093
16
+ },
17
+ {
18
+ "key": "regular",
19
+ "doc_count": 473
20
+ }
21
+ ]
22
+ },
23
+ "access_method": {
24
+ "doc_count_error_upper_bound": 0,
25
+ "sum_other_doc_count": 0,
26
+ "buckets": [
27
+ {
28
+ "key": "machine",
29
+ "doc_count": 5093,
30
+ "session": {
31
+ "doc_count_error_upper_bound": 10,
32
+ "sum_other_doc_count": 5072,
33
+ "buckets": [
34
+ {
35
+ "key": "2018-09-18_16_10.5065/D6V1236Q_54.71.12.185_curl/7.38.0",
36
+ "doc_count": 5
37
+ },
38
+ {
39
+ "key": "2018-09-01_05_10.5065/D6V1236Q_45.79.139.170_curl/7.38.0",
40
+ "doc_count": 4
41
+ },
42
+ {
43
+ "key": "2018-09-03_16_10.5065/D6V1236Q_52.40.104.81_curl/7.38.0",
44
+ "doc_count": 4
45
+ },
46
+ {
47
+ "key": "2018-09-12_00_10.5065/D6V1236Q_52.39.7.168_curl/7.38.0",
48
+ "doc_count": 4
49
+ },
50
+ {
51
+ "key": "2018-09-26_06_10.5065/D6V1236Q_52.39.7.168_curl/7.38.0",
52
+ "doc_count": 4
53
+ }
54
+ ]
55
+ },
56
+ "unqiue": {
57
+ "value": 3084
58
+ }
59
+ }]
60
+ }
61
+ }
62
+ end
63
+
64
+
65
+ factory :usage_event do
66
+ end
67
+
68
+ factory :report do
69
+ end
70
+
71
+ end