ftr_ruby 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,232 @@
1
+ ##
2
+ # Module containing FAIR Test Registry (FTR) related classes.
3
+ module FtrRuby
4
+ ##
5
+ # Represents a single FAIR Test as a DCAT-compliant DataService with additional
6
+ # FAIR-specific metadata.
7
+ #
8
+ # This class generates RDF metadata (in a DCAT + DQV + FTR vocabulary profile)
9
+ # describing a test that can be used to assess FAIR compliance of digital objects
10
+ # (typically datasets). The resulting graph follows the DCAT-AP style for Data Services,
11
+ # extended with FAIR Test Registry (FTR) semantics.
12
+ #
13
+ # == Usage
14
+ #
15
+ # meta = {
16
+ # testid: "ftr-rda-f1-01m",
17
+ # testname: "FAIR Test F1-01M: Persistent Identifier",
18
+ # description: "Checks whether the digital object has a globally unique persistent identifier...",
19
+ # keywords: ["FAIR", "persistent identifier", "F1"],
20
+ # creator: "https://orcid.org/0000-0001-2345-6789",
21
+ # indicators: ["https://w3id.org/ftr/indicator/F1-01M"],
22
+ # metric: "https://w3id.org/ftr/metric/F1-01M",
23
+ # license: "https://creativecommons.org/licenses/by/4.0/",
24
+ # testversion: "1.0",
25
+ # # ... other fields
26
+ # }
27
+ #
28
+ # record = FtrRuby::DCAT_Record.new(meta: meta)
29
+ # graph = record.get_dcat
30
+ #
31
+ class DCAT_Record
32
+ attr_accessor :identifier, :testname, :description, :keywords, :creator,
33
+ :indicators, :end_desc, :end_url, :dctype, :testid, :supportedby,
34
+ :license, :themes, :testversion, :implementations, :isapplicablefor, :applicationarea,
35
+ :organizations, :individuals, :protocol, :host, :basePath, :metric, :landingpage, :definedby
36
+
37
+ require_relative "./output"
38
+ include TripleEasy # get the :"triplify" function
39
+ # triplify(s, p, o, repo, datatype: nil, context: nil, language: 'en')
40
+
41
+ ##
42
+ # Creates a new DCAT_Record from metadata hash.
43
+ #
44
+ # @param meta [Hash] Metadata describing the FAIR test.
45
+ # @option meta [String] :testid Unique identifier for the test (used in URLs)
46
+ # @option meta [String] :testname Human-readable name/title of the test
47
+ # @option meta [String] :description Detailed description of what the test does
48
+ # @option meta [String, Array<String>] :keywords Keywords describing the test
49
+ # @option meta [String] :creator URI or literal identifying the creator
50
+ # @option meta [String, Array<String>] :indicators URIs of the FAIR indicators this test addresses
51
+ # @option meta [String] :metric URI of the metric this test implements
52
+ # @option meta [String] :license License URI for the test
53
+ # @option meta [String, Array<String>] :themes Thematic categories (DCAT themes)
54
+ # @option meta [String] :testversion Version of the test
55
+ # @option meta [Array<Hash>] :individuals List of contact individuals (name, email)
56
+ # @option meta [Array<Hash>] :organizations List of responsible organizations (name, url)
57
+ # @option meta [String] :protocol Protocol (http/https)
58
+ # @option meta [String] :host Hostname of the test service
59
+ # @option meta [String] :basePath Base path of the test service
60
+ #
61
+ # @note Several fields have sensible defaults (e.g. +dctype+, +supportedby+, +applicationarea+).
62
+ # The +end_url+ and +identifier+ are automatically constructed from +protocol+, +host+,
63
+ # +basePath+, and +testid+.
64
+ #
65
+ def initialize(meta:)
66
+ indics = [meta[:indicators]] unless meta[:indicators].is_a? Array
67
+ @indicators = indics
68
+ @testid = meta[:testid]
69
+ @testname = meta[:testname]
70
+ @metric = meta[:metric]
71
+ @description = meta[:description] || "No description provided"
72
+ @keywords = meta[:keywords] || []
73
+ @keywords = [@keywords] unless @keywords.is_a? Array
74
+ @creator = meta[:creator]
75
+ @end_desc = meta[:end_desc]
76
+ @end_url = meta[:end_url]
77
+ @dctype = meta[:dctype] || "http://edamontology.org/operation_2428"
78
+ @supportedby = meta[:supportedby] || ["https://tools.ostrails.eu/champion"]
79
+ @applicationarea = meta[:applicationarea] || ["http://www.fairsharing.org/ontology/subject/SRAO_0000401"]
80
+ @isapplicablefor = meta[:isapplicablefor] || ["https://schema.org/Dataset"]
81
+ @license = meta[:license] || "No License"
82
+ @themes = meta[:themes] || []
83
+ @themes = [@themes] unless @themes.is_a? Array
84
+ @testversion = meta[:testversion] || "unversioned"
85
+ @organizations = meta[:organizations] || []
86
+ @individuals = meta[:individuals] || []
87
+ @protocol = meta[:protocol]
88
+ @host = meta[:host]
89
+ @basePath = meta[:basePath]
90
+ cleanhost = @host.gsub("/", "")
91
+ cleanpath = @basePath.gsub("/", "") # TODO: this needs to check only leading and trailing! NOt internal...
92
+ endpointpath = "assess/test"
93
+ @end_url = "#{protocol}://#{cleanhost}/#{cleanpath}/#{endpointpath}/#{testid}"
94
+ @end_desc = "#{protocol}://#{cleanhost}/#{cleanpath}/#{testid}/api"
95
+ @identifier = "#{protocol}://#{cleanhost}/#{cleanpath}/#{testid}"
96
+ @definedby = meta[:definedby] || @identifier
97
+ @landingpage = meta[:landingPage] || @identifier
98
+
99
+ unless @testid && @testname && @description && @creator && @end_desc && @end_url && @protocol && @host && @basePath
100
+ warn "this record is invalid - it is missing one of testid testname description creator end_desc end_url protocol host basePath"
101
+ end
102
+ end
103
+
104
+ ##
105
+ # Returns an RDF::Graph containing the DCAT metadata for this test.
106
+ #
107
+ # The graph describes the test as both a +dcat:DataService+ and an +ftr:Test+.
108
+ # It includes:
109
+ #
110
+ # * Core DCAT properties (identifier, title, description, keywords, landing page, etc.)
111
+ # * FAIR-specific extensions via the FTR vocabulary
112
+ # * Contact points (individuals and organizations) using vCard
113
+ # * Link to the metric it implements (SIO)
114
+ # * Supported-by relationships, application areas, and applicability statements
115
+ #
116
+ # @return [RDF::Graph] RDF graph with the complete DCAT record
117
+ #
118
+ def get_dcat
119
+ schema = RDF::Vocab::SCHEMA
120
+ dcterms = RDF::Vocab::DC
121
+ xsd = RDF::Vocab::XSD
122
+ dcat = RDF::Vocab::DCAT
123
+ sio = RDF::Vocabulary.new("http://semanticscience.org/resource/")
124
+ ftr = RDF::Vocabulary.new("https://w3id.org/ftr#")
125
+ dqv = RDF::Vocabulary.new("http://www.w3.org/ns/dqv#")
126
+ vcard = RDF::Vocabulary.new("http://www.w3.org/2006/vcard/ns#")
127
+ dpv = RDF::Vocabulary.new("https://w3id.org/dpv#")
128
+
129
+ g = RDF::Graph.new
130
+ # me = "#{identifier}/about" # at the hackathon we decided that the test id would return the metadata
131
+ # so now there is no need for /about
132
+ me = "#{identifier}"
133
+
134
+ triplify(me, RDF.type, dcat.DataService, g)
135
+ triplify(me, RDF.type, ftr.Test, g)
136
+
137
+ # triplify tests and rejects anything that is empty or nil --> SAFE
138
+ # Test Unique Identifier dcterms:identifier Literal
139
+ triplify(me, dcterms.identifier, identifier.to_s, g, datatype: xsd.string)
140
+
141
+ # Title/Name of the test dcterms:title Literal
142
+ triplify(me, dcterms.title, testname, g)
143
+
144
+ # Description dcterms:description Literal
145
+ # descriptions.each do |d|
146
+ # triplify(me, dcterms.description, d, g)
147
+ # end
148
+ triplify(me, dcterms.description, description, g)
149
+
150
+ # Keywords dcat:keyword Literal
151
+ keywords.each do |kw|
152
+ triplify(me, dcat.keyword, kw, g)
153
+ end
154
+
155
+ # Test creator dcterms:creator dcat:Agent (URI)
156
+ triplify(me, dcterms.creator, creator, g)
157
+
158
+ # Dimension ftr:indicator
159
+ indicators.each do |ind|
160
+ triplify(me, dqv.inDimension, ind, g)
161
+ end
162
+
163
+ # API description dcat:endpointDescription rdfs:Resource
164
+ triplify(me, dcat.endpointDescription, end_desc, g)
165
+
166
+ # API URL dcat:endpointURL rdfs:Resource
167
+ triplify(me, dcat.endpointURL, end_url, g)
168
+
169
+ # API URL dcat:landingPage rdfs:Resource
170
+ triplify(me, dcat.landingPage, landingpage, g)
171
+
172
+ # pointer to this turtle file
173
+ triplify(me, RDF::Vocab::RDFS.isDefinedBy, definedby, g)
174
+
175
+ # Functional Descriptor/Operation dcterms:type xsd:anyURI
176
+ triplify(me, dcterms.type, dctype, g)
177
+
178
+ # License dcterms:license xsd:anyURI
179
+ triplify(me, dcterms.license, license, g)
180
+
181
+ # Semantic Annotation dcat:theme xsd:anyURI
182
+ themes.each do |theme|
183
+ triplify(me, dcat.theme, theme, g)
184
+ end
185
+
186
+ # Version dcat:version rdfs:Literal
187
+ triplify(me, RDF::Vocab::DCAT.to_s + "version", testversion, g)
188
+
189
+ triplify(me, sio["SIO_000233"], metric, g) # is implementation of
190
+ triplify(metric, RDF.type, dqv.Metric, g) # is implementation of
191
+
192
+ # Responsible dcat:contactPoint dcat:Kind (includes Individual/Organization)
193
+ individuals.each do |i|
194
+ # i = {name: "Mark WAilkkinson", "email": "asmlkfj;askjf@a;lksdjfas"}
195
+ guid = SecureRandom.uuid
196
+ cp = "urn:fairchampion:testmetadata:individual#{guid}"
197
+ triplify(me, dcat.contactPoint, cp, g)
198
+ triplify(cp, RDF.type, vcard.Individual, g)
199
+ triplify(cp, vcard.fn, i["name"], g) if i["name"]
200
+ next unless i["email"]
201
+
202
+ email = i["email"].to_s
203
+ email = "mailto:#{email}" unless email =~ /mailto:/
204
+ triplify(cp, vcard.hasEmail, RDF::URI.new(email), g)
205
+ end
206
+
207
+ organizations.each do |o|
208
+ # i = {name: "CBGP", "url": "https://dbdsf.orhf"}
209
+ guid = SecureRandom.uuid
210
+ cp = "urn:fairchampion:testmetadata:org:#{guid}"
211
+ triplify(me, dcat.contactPoint, cp, g)
212
+ triplify(cp, RDF.type, vcard.Organization, g)
213
+ triplify(cp, vcard["organization-name"], o["name"], g)
214
+ triplify(cp, vcard.url, RDF::URI.new(o["url"].to_s), g)
215
+ end
216
+
217
+ supportedby.each do |tool|
218
+ triplify(me, ftr.supportedBy, tool, g)
219
+ triplify(tool, RDF.type, schema.SoftwareApplication, g)
220
+ end
221
+
222
+ applicationarea.each do |domain|
223
+ triplify(me, ftr.applicationArea, domain, g)
224
+ end
225
+ isapplicablefor.each do |digitalo|
226
+ triplify(me, dpv.isApplicableFor, digitalo, g)
227
+ end
228
+
229
+ g
230
+ end
231
+ end
232
+ end
data/lib/fdp_index.rb ADDED
@@ -0,0 +1,168 @@
1
+ require "sparql/client"
2
+ require "sparql"
3
+ require "json/ld"
4
+ require "json/ld/preloaded"
5
+ require "rdf/trig"
6
+ require "rdf/raptor"
7
+ require "fileutils" # For directory creation
8
+ require "digest" # For hashing URLs to filenames
9
+
10
+ module FtrRuby
11
+ class FDPIndex
12
+ # Cache directory and expiry time (in seconds, e.g., 24 hours)
13
+ CACHE_DIR = File.join(Dir.pwd, "cache", "rdf_repositories")
14
+ CACHE_EXPIRY = 240 * 60 * 60 # 24 hours in seconds
15
+
16
+ def self.retrieve_tests_from_index(indexendpoint: "https://tools.ostrails.eu/repositories/fdpindex-fdp")
17
+ sparql = SPARQL::Client.new(indexendpoint)
18
+
19
+ fdpindexquery = <<EOQUERY
20
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
21
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
22
+ PREFIX dqv: <http://www.w3.org/ns/dqv#>
23
+ PREFIX dct: <http://purl.org/dc/terms/>
24
+ PREFIX dcat: <http://www.w3.org/ns/dcat#>
25
+ PREFIX sio: <http://semanticscience.org/resource/>
26
+ PREFIX dpv: <http://www.w3.org/ns/dpv#>
27
+ PREFIX ftr: <https://w3id.org/ftr#>
28
+ SELECT distinct ?sub ?identifier ?title ?description ?endpoint ?openapi ?dimension ?objects ?domain ?benchmark_or_metric WHERE {
29
+ ?sub a <https://w3id.org/ftr#Test> ;
30
+ dct:title ?title ;
31
+ dct:description ?description ;
32
+ dct:identifier ?identifier .
33
+ OPTIONAL {?sub dpv:isApplicableFor ?objects }
34
+ OPTIONAL {?sub ftr:applicationArea ?domain }
35
+ OPTIONAL {?sub sio:SIO_000233 ?benchmark_or_metric } # implementation of#{" "}
36
+ OPTIONAL {?sub dcat:endpointDescription ?openapi }
37
+ OPTIONAL {?sub dcat:endpointURL ?endpoint }
38
+ OPTIONAL {?sub dqv:inDimension ?dimension }
39
+ }#{" "}
40
+ EOQUERY
41
+
42
+ alltests = []
43
+
44
+ begin
45
+ # Execute the query
46
+ results = sparql.query(fdpindexquery)
47
+
48
+ # Process the results
49
+ results.each_solution do |solution|
50
+ test_object = {
51
+ subj: solution[:sub]&.to_s,
52
+ identifier: solution[:identifier]&.to_s,
53
+ title: solution[:title]&.to_s,
54
+ description: solution[:description]&.to_s,
55
+ endpoint: solution[:endpoint]&.to_s,
56
+ openapi: solution[:openapi]&.to_s,
57
+ dimension: solution[:dimension]&.to_s,
58
+ objects: solution[:objects]&.to_s,
59
+ domain: solution[:domain]&.to_s,
60
+ benchmark_or_metric: solution[:benchmark_or_metric]&.to_s
61
+ }
62
+ alltests << test_object
63
+ end
64
+ rescue StandardError => e
65
+ puts "Error executing SPARQL query: #{e.message}"
66
+ end
67
+
68
+ alltests
69
+ end
70
+
71
+ def self.get_metrics_labels_for_tests(tests:)
72
+ labels = {}
73
+ cache = {} # In-memory cache for this request
74
+
75
+ # Ensure cache directory exists
76
+ FileUtils.mkdir_p(CACHE_DIR)
77
+
78
+ tests.each do |test|
79
+ metric = test[:benchmark_or_metric] # Assume required
80
+ warn "Processing metric: #{metric}"
81
+
82
+ # Generate a safe filename for the metric URL
83
+ cache_key = Digest::SHA256.hexdigest(metric)
84
+ cache_file = File.join(CACHE_DIR, "#{cache_key}.bin")
85
+
86
+ # Check in-memory cache first
87
+ if cache[metric]
88
+ repository = cache[metric]
89
+ else
90
+ # Try to load from disk cache
91
+ repository = load_from_cache(cache_file)
92
+ if repository
93
+ warn "Loaded #{metric} from cache"
94
+ else
95
+ # Cache miss: fetch from URL
96
+ warn "Fetching RDF for #{metric}"
97
+ repository = RDF::Repository.new
98
+ headers = { "Accept" => "application/ld+json" }
99
+ begin
100
+ RDF::Reader.open(metric, headers: headers) do |reader|
101
+ repository << reader
102
+ end
103
+ # Save to disk cache with timestamp
104
+ save_to_cache(cache_file, repository)
105
+ warn "Cached #{metric} to disk"
106
+ rescue StandardError => e
107
+ warn "Error fetching RDF for #{metric}: #{e.message}"
108
+ labels[metric] = "Unable to resolve #{metric} to RDF metadata"
109
+ next
110
+ end
111
+ end
112
+ cache[metric] = repository # Store in memory for this request
113
+ end
114
+
115
+ # SPARQL query to get label
116
+ fdpindexquery = <<-METRICLABEL
117
+ PREFIX dct: <http://purl.org/dc/terms/>
118
+ PREFIX schema: <http://schema.org/>
119
+ SELECT DISTINCT ?label WHERE {
120
+ { ?sub dct:title ?label }
121
+ UNION
122
+ { ?sub schema:name ?label }
123
+ }
124
+ METRICLABEL
125
+
126
+ # Parse and execute the SPARQL query
127
+ fdpindexquery = SPARQL.parse(fdpindexquery)
128
+ results = fdpindexquery.execute(repository)
129
+
130
+ # Assign the label (first result or fallback)
131
+ labels[metric] = if results&.first&.[](:label)&.to_s&.length&.positive?
132
+ results.first[:label].to_s
133
+ else
134
+ "Unnamed Metric"
135
+ end
136
+ end
137
+
138
+ labels
139
+ end
140
+
141
+ # Load RDF::Repository from disk cache if not expired
142
+ def self.load_from_cache(cache_file)
143
+ return nil unless File.exist?(cache_file)
144
+
145
+ # Read timestamp and serialized data
146
+ File.open(cache_file, "rb") do |file|
147
+ timestamp = Marshal.load(file)
148
+ if Time.now - timestamp < CACHE_EXPIRY
149
+ return Marshal.load(file) # Return cached RDF::Repository
150
+ end
151
+ end
152
+ nil # Cache expired or invalid
153
+ rescue StandardError => e
154
+ warn "Error loading cache from #{cache_file}: #{e.message}"
155
+ nil
156
+ end
157
+
158
+ # Save RDF::Repository to disk cache with timestamp
159
+ def self.save_to_cache(cache_file, repository)
160
+ File.open(cache_file, "wb") do |file|
161
+ Marshal.dump(Time.now, file) # Store timestamp
162
+ Marshal.dump(repository, file) # Store repository
163
+ end
164
+ rescue StandardError => e
165
+ warn "Error saving cache to #{cache_file}: #{e.message}"
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FtrRuby
4
+ VERSION = "0.1.4"
5
+ end
data/lib/ftr_ruby.rb ADDED
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "ftr_ruby/version"
4
+ require "rest-client"
5
+ require "json"
6
+ require "sparql"
7
+ require "sparql/client"
8
+ require "linkeddata"
9
+ require "safe_yaml"
10
+ require "rdf/nquads"
11
+ require "cgi"
12
+ require "securerandom"
13
+ require "rdf/vocab"
14
+ require "triple_easy" # provides "triplify" top-level function
15
+
16
+ # lib/ftr_ruby.rb
17
+
18
+ require "dcat_metadata"
19
+ require "output"
20
+ require "fdp_index"
21
+ require "openapi"
22
+ require "registertest"
23
+ require "test_infrastructure"
24
+
25
+ module FtrRuby
26
+ class Error < StandardError; end
27
+ # Your code goes here...
28
+ end
data/lib/openapi.rb ADDED
@@ -0,0 +1,79 @@
1
+ module FtrRuby
2
+ class OpenAPI
3
+ attr_accessor :title, :metric, :description, :indicator, :testid,
4
+ :organization, :org_url, :version, :creator,
5
+ :responsible_developer, :email, :developer_ORCiD, :protocol,
6
+ :host, :basePath, :path, :response_description, :schemas, :endpointpath
7
+
8
+ def initialize(meta:)
9
+ indics = [meta[:indicators]] unless meta[:indicators].is_a? Array
10
+ @testid = meta[:testid]
11
+ @title = meta[:testname]
12
+ @version = meta[:testversion]
13
+ @metric = meta[:metric]
14
+ @description = meta[:description]
15
+ @indicator = indics.first
16
+ @organization = meta[:organization]
17
+ @org_url = meta[:org_url]
18
+ @responsible_developer = meta[:responsible_developer]
19
+ @email = meta[:email]
20
+ @creator = meta[:creator]
21
+ @host = meta[:host]
22
+ @host = @host.gsub(%r{/$}, "") # remove trailing slash if present
23
+ @protocol = meta[:protocol].gsub(%r{[:/]}, "")
24
+ @basePath = meta[:basePath].gsub(%r{[:/]}, "")
25
+ @basePath = "/#{basePath}" unless basePath[0] == "/" # must start with a slash
26
+ # @path = meta[:path]
27
+ @response_description = meta[:response_description]
28
+ @schemas = meta[:schemas]
29
+ @endpointpath = "assess/test"
30
+ # @end_url = "#{protocol}://#{host}#{basePath}/#{endpointpath}/#{testid}" # basepath starts with /
31
+ end
32
+
33
+ def get_api
34
+ <<~"EOF_EOF"
35
+
36
+ openapi: 3.0.0
37
+ info:
38
+ version: "#{version}"
39
+ title: "#{title}"
40
+ x-tests_metric: "#{metric}"
41
+ description: >-
42
+ "#{description}"
43
+ x-applies_to_principle: "#{indicator}"
44
+ contact:
45
+ x-organization: "#{organization}"
46
+ url: "#{org_url}"
47
+ name: "#{responsible_developer}"
48
+ x-role: responsible developer
49
+ email: "#{email}"
50
+ x-id: "#{creator}"
51
+ paths:
52
+ "/#{testid}":
53
+ post:
54
+ requestBody:
55
+ content:
56
+ application/json:
57
+ schema:
58
+ $ref: "#/components/schemas/schemas"
59
+ required: true
60
+ responses:
61
+ "200":
62
+ description: >-
63
+ #{response_description}
64
+ servers:
65
+ - url: "#{protocol}://#{host}#{basePath}/#{endpointpath}"
66
+ components:
67
+ schemas:
68
+ schemas:
69
+ required:
70
+ - resource_identifier
71
+ properties:
72
+ - resource_identifier:
73
+ type: string
74
+ description: the GUID being tested
75
+
76
+ EOF_EOF
77
+ end
78
+ end
79
+ end