tao_rdfizer 0.10 → 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/bin/tao_rdfizer +10 -1
 - data/lib/tao_rdfizer/tao_rdfizer.rb +54 -12
 - metadata +3 -3
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: ef68cf0cfd37026eda3abfb19f294d866c343294c1a9cc8f4ed2aa9af1f1443c
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: fdde442909c60d0dbfe81ea445b049a52829752d01b06f5f599786071b09b62c
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: a0219d82d900259fd8e7dab5ea5382bc06bb504ea21207302b21b59047f33ba899491aa53043a9456af1c9e1693d9a957223a9cad31d8a2e21f4df60aebdbf4e
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 5250d9a3daf7716e5c8f2e01fe2560589862fb3b7cdf7a03c067f07e47930f91b6912b7b9316d8de652fcf983f677e1ab0d9de16954f2fdf3880478e7f50344a
         
     | 
    
        data/bin/tao_rdfizer
    CHANGED
    
    | 
         @@ -3,6 +3,7 @@ require 'tao_rdfizer' 
     | 
|
| 
       3 
3 
     | 
    
         
             
            require 'json'
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
       5 
5 
     | 
    
         
             
            mode = nil
         
     | 
| 
      
 6 
     | 
    
         
            +
            options = {}
         
     | 
| 
       6 
7 
     | 
    
         | 
| 
       7 
8 
     | 
    
         
             
            ## command line option processing
         
     | 
| 
       8 
9 
     | 
    
         
             
            require 'optparse'
         
     | 
| 
         @@ -17,6 +18,14 @@ optparse = OptionParser.new do |opts| 
     | 
|
| 
       17 
18 
     | 
    
         
             
                mode = :spans
         
     | 
| 
       18 
19 
     | 
    
         
             
              end
         
     | 
| 
       19 
20 
     | 
    
         | 
| 
      
 21 
     | 
    
         
            +
              opts.on('-x', '--x-prefixes', 'without prefixes.') do
         
     | 
| 
      
 22 
     | 
    
         
            +
                options[:with_prefixes] = false
         
     | 
| 
      
 23 
     | 
    
         
            +
              end
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
              opts.on('-o', '--only-prefixes', 'only prefixes.') do
         
     | 
| 
      
 26 
     | 
    
         
            +
                options[:only_prefixes] = true
         
     | 
| 
      
 27 
     | 
    
         
            +
              end
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
       20 
29 
     | 
    
         
             
              opts.on('-h', '--help', 'displays this screen.') do
         
     | 
| 
       21 
30 
     | 
    
         
             
                puts opts
         
     | 
| 
       22 
31 
     | 
    
         
             
                exit
         
     | 
| 
         @@ -34,7 +43,7 @@ begin 
     | 
|
| 
       34 
43 
     | 
    
         
             
            	annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
         
     | 
| 
       35 
44 
     | 
    
         
             
            	annotations = [annotations] unless annotations.class == Array
         
     | 
| 
       36 
45 
     | 
    
         
             
            	rdfizer = TAO::RDFizer.new(mode)
         
     | 
| 
       37 
     | 
    
         
            -
            	puts rdfizer.rdfize(annotations)
         
     | 
| 
      
 46 
     | 
    
         
            +
            	puts rdfizer.rdfize(annotations, options)
         
     | 
| 
       38 
47 
     | 
    
         
             
            rescue ArgumentError, IOError => e
         
     | 
| 
       39 
48 
     | 
    
         
             
            	puts e.message
         
     | 
| 
       40 
49 
     | 
    
         
             
            end
         
     | 
| 
         @@ -14,18 +14,45 @@ class TAO::RDFizer 
     | 
|
| 
       14 
14 
     | 
    
         
             
            		else
         
     | 
| 
       15 
15 
     | 
    
         
             
            			ERB_ANNOTATIONS_TTL
         
     | 
| 
       16 
16 
     | 
    
         
             
            		end
         
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
17 
     | 
    
         
             
            		@tao_ttl_erb = ERB.new(template, nil, '-')
         
     | 
| 
       19 
18 
     | 
    
         
             
            		@prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
         
     | 
| 
       20 
19 
     | 
    
         
             
            	end
         
     | 
| 
       21 
20 
     | 
    
         | 
| 
       22 
     | 
    
         
            -
            	def rdfize(annotations_col)
         
     | 
| 
      
 21 
     | 
    
         
            +
            	def rdfize(annotations_col, options = nil)
         
     | 
| 
      
 22 
     | 
    
         
            +
            		options ||= {}
         
     | 
| 
      
 23 
     | 
    
         
            +
            		only_prefixes = options.has_key?(:only_prefixes) ? options[:only_prefixes] == true : false
         
     | 
| 
      
 24 
     | 
    
         
            +
            		with_prefixes = options.has_key?(:with_prefixes) ? options[:with_prefixes] == true : true
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
            		# check the format
         
     | 
| 
      
 27 
     | 
    
         
            +
            		annotations_col.each do |annotations|
         
     | 
| 
      
 28 
     | 
    
         
            +
            			raise "'target' is missing" unless annotations.has_key? :target
         
     | 
| 
      
 29 
     | 
    
         
            +
            		end
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
       23 
31 
     | 
    
         
             
            		# namespaces
         
     | 
| 
       24 
32 
     | 
    
         
             
            		namespaces = {}
         
     | 
| 
       25 
33 
     | 
    
         | 
| 
       26 
34 
     | 
    
         
             
            		anns = annotations_col.first
         
     | 
| 
       27 
35 
     | 
    
         
             
            		anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
         
     | 
| 
       28 
36 
     | 
    
         | 
| 
      
 37 
     | 
    
         
            +
            		prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
            		if only_prefixes
         
     | 
| 
      
 40 
     | 
    
         
            +
            			prefixes_ttl
         
     | 
| 
      
 41 
     | 
    
         
            +
            		else
         
     | 
| 
      
 42 
     | 
    
         
            +
            			annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
         
     | 
| 
      
 43 
     | 
    
         
            +
            			if with_prefixes
         
     | 
| 
      
 44 
     | 
    
         
            +
            				prefixes_ttl + annotations_ttl
         
     | 
| 
      
 45 
     | 
    
         
            +
            			else
         
     | 
| 
      
 46 
     | 
    
         
            +
            				annotations_ttl
         
     | 
| 
      
 47 
     | 
    
         
            +
            			end
         
     | 
| 
      
 48 
     | 
    
         
            +
            		end
         
     | 
| 
      
 49 
     | 
    
         
            +
            	end
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
            	private
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
            	def get_annotations_ttl(annotations_col, namespaces)
         
     | 
| 
      
 54 
     | 
    
         
            +
            		anns = annotations_col.first
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
       29 
56 
     | 
    
         
             
            		unless @mode ==:spans
         
     | 
| 
       30 
57 
     | 
    
         
             
            			raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
         
     | 
| 
       31 
58 
     | 
    
         
             
            			prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
         
     | 
| 
         @@ -53,14 +80,14 @@ class TAO::RDFizer 
     | 
|
| 
       53 
80 
     | 
    
         
             
            			_relations = annotations[:relations] || []
         
     | 
| 
       54 
81 
     | 
    
         
             
            			if @mode == :spans && annotations.has_key?(:tracks)
         
     | 
| 
       55 
82 
     | 
    
         
             
            				annotations[:tracks].each do |track|
         
     | 
| 
       56 
     | 
    
         
            -
            					_denotations += track[:denotations]
         
     | 
| 
       57 
     | 
    
         
            -
            					_attributes += track[:attributes]
         
     | 
| 
       58 
     | 
    
         
            -
            					_relations += track[:relations]
         
     | 
| 
      
 83 
     | 
    
         
            +
            					_denotations += track[:denotations] if track.has_key? :denotations
         
     | 
| 
      
 84 
     | 
    
         
            +
            					_attributes += track[:attributes] if track.has_key? :attributes
         
     | 
| 
      
 85 
     | 
    
         
            +
            					_relations += track[:relations] if track.has_key? :relations
         
     | 
| 
       59 
86 
     | 
    
         
             
            				end
         
     | 
| 
       60 
87 
     | 
    
         
             
            			end
         
     | 
| 
       61 
88 
     | 
    
         | 
| 
       62 
89 
     | 
    
         
             
            			begin
         
     | 
| 
       63 
     | 
    
         
            -
            				 
     | 
| 
      
 90 
     | 
    
         
            +
            				unless @mode == :span
         
     | 
| 
       64 
91 
     | 
    
         
             
            					# index attributes
         
     | 
| 
       65 
92 
     | 
    
         
             
            					attributesh = _attributes.inject({}) do |h, a|
         
     | 
| 
       66 
93 
     | 
    
         
             
            						if a[:pred].end_with?('_id')
         
     | 
| 
         @@ -76,7 +103,7 @@ class TAO::RDFizer 
     | 
|
| 
       76 
103 
     | 
    
         
             
            						span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
         
     | 
| 
       77 
104 
     | 
    
         
             
            						d[:span_uri] = span_uri
         
     | 
| 
       78 
105 
     | 
    
         
             
            						d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
         
     | 
| 
       79 
     | 
    
         
            -
            						class_uris = attributesh[d[:id]].push(d[:obj])
         
     | 
| 
      
 106 
     | 
    
         
            +
            						class_uris = (attributesh[d[:id]] || []).push(d[:obj])
         
     | 
| 
       80 
107 
     | 
    
         
             
            						d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
         
     | 
| 
       81 
108 
     | 
    
         
             
            					rescue ArgumentError => e
         
     | 
| 
       82 
109 
     | 
    
         
             
            						raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
         
     | 
| 
         @@ -167,11 +194,9 @@ class TAO::RDFizer 
     | 
|
| 
       167 
194 
     | 
    
         
             
            			spans += _spans unless @mode == :annotations
         
     | 
| 
       168 
195 
     | 
    
         
             
            		end
         
     | 
| 
       169 
196 
     | 
    
         | 
| 
       170 
     | 
    
         
            -
            		 
     | 
| 
      
 197 
     | 
    
         
            +
            		@tao_ttl_erb.result(binding)
         
     | 
| 
       171 
198 
     | 
    
         
             
            	end
         
     | 
| 
       172 
199 
     | 
    
         | 
| 
       173 
     | 
    
         
            -
            	private
         
     | 
| 
       174 
     | 
    
         
            -
             
     | 
| 
       175 
200 
     | 
    
         
             
            	def include_parent?(spans, span)
         
     | 
| 
       176 
201 
     | 
    
         
             
            		# spans.each{|s| return true if (s[:begin] <= span[:begin] && s[:end] > span[:end]) || (s[:begin] < span[:begin] && s[:end] >= span[:end])}
         
     | 
| 
       177 
202 
     | 
    
         
             
            		spans.each{|s| return true if s[:begin] <= span[:begin] && s[:end] >= span[:end]}
         
     | 
| 
         @@ -187,7 +212,10 @@ class TAO::RDFizer 
     | 
|
| 
       187 
212 
     | 
    
         
             
            	end
         
     | 
| 
       188 
213 
     | 
    
         | 
| 
       189 
214 
     | 
    
         
             
            	def find_uri (label, namespaces, prefix_for_this)
         
     | 
| 
       190 
     | 
    
         
            -
            		 
     | 
| 
      
 215 
     | 
    
         
            +
            		if label.match(/\s/)
         
     | 
| 
      
 216 
     | 
    
         
            +
            			# raise ArgumentError, "A label including a whitespace character found: #{label}."
         
     | 
| 
      
 217 
     | 
    
         
            +
            			label.gsub(/\s/, '_')
         
     | 
| 
      
 218 
     | 
    
         
            +
            		end
         
     | 
| 
       191 
219 
     | 
    
         
             
            		delimiter_position = label.index(':')
         
     | 
| 
       192 
220 
     | 
    
         
             
            		if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
         
     | 
| 
       193 
221 
     | 
    
         
             
            			label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
         
     | 
| 
         @@ -203,6 +231,18 @@ class TAO::RDFizer 
     | 
|
| 
       203 
231 
     | 
    
         
             
            		end
         
     | 
| 
       204 
232 
     | 
    
         
             
            	end
         
     | 
| 
       205 
233 
     | 
    
         | 
| 
      
 234 
     | 
    
         
            +
            	def rdf_literal_escape(string)
         
     | 
| 
      
 235 
     | 
    
         
            +
            		string.gsub('\\', '\\\\').
         
     | 
| 
      
 236 
     | 
    
         
            +
            					 gsub("\t", '\\t').
         
     | 
| 
      
 237 
     | 
    
         
            +
            					 gsub("\b", '\\b').
         
     | 
| 
      
 238 
     | 
    
         
            +
            					 gsub("\n", '\\n').
         
     | 
| 
      
 239 
     | 
    
         
            +
            					 gsub("\r", '\\r').
         
     | 
| 
      
 240 
     | 
    
         
            +
            					 gsub("\f", '\\f').
         
     | 
| 
      
 241 
     | 
    
         
            +
            					 gsub('"', '\\"').
         
     | 
| 
      
 242 
     | 
    
         
            +
            					 freeze
         
     | 
| 
      
 243 
     | 
    
         
            +
            	end
         
     | 
| 
      
 244 
     | 
    
         
            +
             
     | 
| 
      
 245 
     | 
    
         
            +
            	# variable: denotations, relations
         
     | 
| 
       206 
246 
     | 
    
         
             
            	ERB_ANNOTATIONS_TTL = <<~HEREDOC
         
     | 
| 
       207 
247 
     | 
    
         
             
            		<% denotations.each do |d| -%>
         
     | 
| 
       208 
248 
     | 
    
         
             
            		<%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
         
     | 
| 
         @@ -214,6 +254,7 @@ class TAO::RDFizer 
     | 
|
| 
       214 
254 
     | 
    
         
             
            		<% end -%>
         
     | 
| 
       215 
255 
     | 
    
         
             
            	HEREDOC
         
     | 
| 
       216 
256 
     | 
    
         | 
| 
      
 257 
     | 
    
         
            +
            	# variable: spans
         
     | 
| 
       217 
258 
     | 
    
         
             
            	ERB_SPANS_TTL = <<~HEREDOC
         
     | 
| 
       218 
259 
     | 
    
         
             
            		<% spans.each do |s| -%>
         
     | 
| 
       219 
260 
     | 
    
         
             
            		<%= s[:span_uri] %> rdf:type tao:Text_span ;
         
     | 
| 
         @@ -223,13 +264,14 @@ class TAO::RDFizer 
     | 
|
| 
       223 
264 
     | 
    
         
             
            		<% s[:children].each do |s| -%>
         
     | 
| 
       224 
265 
     | 
    
         
             
            			tao:contains <%= s[:span_uri] %> ;
         
     | 
| 
       225 
266 
     | 
    
         
             
            		<% end -%>
         
     | 
| 
       226 
     | 
    
         
            -
            			tao:has_text <%= s[:text] 
     | 
| 
      
 267 
     | 
    
         
            +
            			tao:has_text "<%= rdf_literal_escape(s[:text]) %>" ;
         
     | 
| 
       227 
268 
     | 
    
         
             
            			tao:belongs_to <<%= s[:source_uri] %>> ;
         
     | 
| 
       228 
269 
     | 
    
         
             
            			tao:begins_at <%= s[:begin] %> ;
         
     | 
| 
       229 
270 
     | 
    
         
             
            			tao:ends_at <%= s[:end] %> .
         
     | 
| 
       230 
271 
     | 
    
         
             
            		<% end -%>
         
     | 
| 
       231 
272 
     | 
    
         
             
            	HEREDOC
         
     | 
| 
       232 
273 
     | 
    
         | 
| 
      
 274 
     | 
    
         
            +
            	# variable: namespaces
         
     | 
| 
       233 
275 
     | 
    
         
             
            	ERB_PREFIXES_TTL = <<~HEREDOC
         
     | 
| 
       234 
276 
     | 
    
         
             
            		@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
         
     | 
| 
       235 
277 
     | 
    
         
             
            		@prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: tao_rdfizer
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version:  
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.11.2
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Jin-Dong Kim
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date:  
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2021-05-23 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies: []
         
     | 
| 
       13 
13 
     | 
    
         
             
            description: It uses TAO (text annotation ontology) for representation of annotations
         
     | 
| 
       14 
14 
     | 
    
         
             
              to text.
         
     | 
| 
         @@ -40,7 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       40 
40 
     | 
    
         
             
                - !ruby/object:Gem::Version
         
     | 
| 
       41 
41 
     | 
    
         
             
                  version: '0'
         
     | 
| 
       42 
42 
     | 
    
         
             
            requirements: []
         
     | 
| 
       43 
     | 
    
         
            -
            rubygems_version: 3.0. 
     | 
| 
      
 43 
     | 
    
         
            +
            rubygems_version: 3.0.9
         
     | 
| 
       44 
44 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       45 
45 
     | 
    
         
             
            specification_version: 4
         
     | 
| 
       46 
46 
     | 
    
         
             
            summary: A RDF statement generator for annotations in the PubAnnotation JSON format.
         
     |