rdf-rdfa 0.0.3 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,11 @@
1
+ === 0.2.1
2
+ * Update for RDF 0.2.1
3
+
4
+ === 0.2.0
5
+ * Updates for RDF 0.2.0
6
+ * Use URI#intern instead of URI#new
7
+ * Change use of Graph#predicates and Graph#objects to use as enumerables
8
+
1
9
  === 0.0.3
2
10
  * Removed interal graph in Reader and implement each_triple & each_statement to perform parsing
3
11
 
data/README.rdoc CHANGED
@@ -25,16 +25,14 @@ Instantiate a parser and parse source, specifying type and base-URL
25
25
  end
26
26
 
27
27
  == Dependencies
28
- * [RDF.rb](http://rubygems.org/gems/rdf) (>= 0.1.6)
28
+ * [RDF.rb](http://rubygems.org/gems/rdf) (>= 0.2.0)
29
29
  * [Nokogiri](http://rubygems.org/gems/nokogiri) (>= 1.3.3)
30
- * [Patron](http://rubygems.org/gems/patron) (>= 0.4.6) -- For RDFa profiles
31
30
 
32
31
  == TODO
33
32
  * Add support for LibXML and REXML bindings, and use the best available
34
33
  * Consider a SAX-based parser for improved performance
35
34
  * Port SPARQL tests to native SPARQL processor, when one becomes available.
36
35
  * Add generic XHTML+RDFa Writer
37
- * Only parse profiles if Patron included, don't force otherwise
38
36
 
39
37
  == Resources:
40
38
  * RDF.rb[http://rdf.rubyforge.org/]
data/Rakefile CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'rubygems'
2
+ require 'yard'
2
3
 
3
4
  begin
4
5
  gem 'jeweler'
@@ -12,12 +13,14 @@ begin
12
13
  gemspec.email = "gregg@kellogg-assoc.com"
13
14
  gemspec.homepage = "http://github.com/gkellogg/rdf-rdfa"
14
15
  gemspec.authors = ["Gregg Kellogg"]
15
- gemspec.add_dependency('rdf', '>= 0.1.6')
16
+ gemspec.add_dependency('rdf', '>= 0.2.1')
16
17
  gemspec.add_dependency('nokogiri', '>= 1.3.3')
17
18
  gemspec.add_development_dependency('rspec')
18
- gemspec.add_development_dependency('rdf-spec')
19
- gemspec.add_development_dependency('activesupport', '>= 2.3.0')
20
- gemspec.extra_rdoc_files = %w(README.rdoc History.txt AUTHORS)
19
+ gemspec.add_development_dependency('rdf-spec', '>= 0.2.1')
20
+ gemspec.add_development_dependency('rdf-rdfxml', '>= 0.2.1')
21
+ gemspec.add_development_dependency('rdf-isomorphic')
22
+ gemspec.add_development_dependency('yard')
23
+ gemspec.extra_rdoc_files = %w(README.rdoc History.txt AUTHORS CONTRIBUTORS)
21
24
  end
22
25
  Jeweler::GemcutterTasks.new
23
26
  rescue LoadError
@@ -45,4 +48,24 @@ Spec::Rake::SpecTask.new("doc:spec") do |spec|
45
48
  spec.spec_opts = ["--format", "html:doc/spec.html"]
46
49
  end
47
50
 
51
+ YARD::Rake::YardocTask.new do |t|
52
+ t.files = %w(lib/**/*.rb README.rdoc History.txt AUTHORS CONTRIBUTORS) # optional
53
+ end
54
+
55
+ desc "Generate RDF Core Manifest.yml"
56
+ namespace :spec do
57
+ task :prepare do
58
+ $:.unshift(File.join(File.dirname(__FILE__), 'lib'))
59
+ require 'rdf/rdfa'
60
+ require 'spec/rdfa_helper'
61
+ require 'fileutils'
62
+
63
+ %w(xhtml html4 html5).each do |suite|
64
+ yaml = manifest_file = File.join(File.dirname(__FILE__), "spec", "#{suite}-manifest.yml")
65
+ FileUtils.rm_f(yaml)
66
+ RdfaHelper::TestCase.to_yaml(suite, yaml)
67
+ end
68
+ end
69
+ end
70
+
48
71
  task :default => :spec
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.3
1
+ 0.2.1
data/lib/rdf/rdfa.rb CHANGED
@@ -25,6 +25,8 @@ module RDF
25
25
  module RDFa
26
26
  require 'rdf/rdfa/format'
27
27
  require 'rdf/rdfa/vocab'
28
+ require 'rdf/rdfa/patches/literal_hacks'
29
+ require 'rdf/rdfa/patches/uri_hacks'
28
30
  autoload :Reader, 'rdf/rdfa/reader'
29
31
  autoload :VERSION, 'rdf/rdfa/version'
30
32
  end
@@ -0,0 +1,147 @@
1
+ # Use Nokogiri or LibXML when available, and REXML otherwise:
2
+ begin
3
+ require 'nokogiri'
4
+ rescue LoadError => e
5
+ begin
6
+ require 'libxml'
7
+ rescue LoadError => e
8
+ :rexml
9
+ end
10
+ end
11
+
12
+ module RDF; class Literal
13
+ ##
14
+ # An XML literal.
15
+ #
16
+ # @see http://www.w3.org/TR/rdf-concepts/#section-XMLLiteral
17
+ # @see http://www.w3.org/TR/rdfa-core/#s_xml_literals
18
+ # @since 0.2.1
19
+ class XML < Literal
20
+ ##
21
+ # @param [Object] value
22
+ # @option options [String] :lexical (nil)
23
+ # @option options [Hash] :namespaces (nil)
24
+ # @option options [Hash] :namespaces ({})
25
+ # @option options [Symbol] :language (nil)
26
+ # @option options [Symbol] :library (:nokogiri, :libxml, or :rexml)
27
+ def initialize(value, options = {})
28
+ options[:namespaces] ||= {}
29
+
30
+ @library = case options[:library]
31
+ when nil
32
+ case
33
+ when defined?(::Nokogiri) then :nokogiri
34
+ when defined?(::LibXML) then :libxml
35
+ else :rexml
36
+ end
37
+ when :nokogiri, :libxml, :rexml
38
+ options[:library]
39
+ else
40
+ raise ArgumentError.new("expected :rexml, :libxml or :nokogiri, but got #{options[:library].inspect}")
41
+ end
42
+
43
+ @datatype = options[:datatype] || DATATYPE
44
+ @string = options[:lexical] if options.has_key?(:lexical)
45
+ @object = parse_value(value, options)
46
+ @string = serialize_nodeset(@object)
47
+ end
48
+
49
+ ##
50
+ # Converts the literal into its canonical lexical representation.
51
+ #
52
+ # @return [Literal]
53
+ # @see http://www.w3.org/TR/xml-exc-c14n/
54
+ def canonicalize
55
+ # This is the opportunity to use exclusive canonicalization library
56
+ self
57
+ end
58
+
59
+ ##
60
+ # Returns the value as a string.
61
+ #
62
+ # @return [String]
63
+ def to_s
64
+ @string
65
+ end
66
+
67
+ private
68
+
69
+ def parse_value(value, options)
70
+ ns_hash = {}
71
+ options[:namespaces].each_pair do |prefix, uri|
72
+ attr = prefix.to_s.empty? ? "xmlns" : "xmlns:#{prefix}"
73
+ ns_hash[attr] = uri.to_s
74
+ end
75
+ ns_strs = []
76
+ ns_hash.each_pair {|a, u| ns_strs << "#{a}=\"#{u}\""}
77
+
78
+ case @library
79
+ when :nokogiri then parse_value_nokogiri(value, ns_strs, options[:language])
80
+ when :libxml then parse_value_libxml(value, ns_strs, options[:language])
81
+ when :rexml then parse_value_rexml(value, ns_strs, options[:language])
82
+ else value.to_s
83
+ end
84
+ end
85
+
86
+ def serialize_nodeset(object)
87
+ case @library
88
+ when :nokogiri then serialize_nodeset_nokogiri(object)
89
+ when :libxml then serialize_nodeset_libxml(object)
90
+ when :rexml then serialize_nodeset_rexml(object)
91
+ else object
92
+ end
93
+ end
94
+
95
+ # Nokogiri implementations
96
+ if defined?(::Nokogiri)
97
+ def parse_value_nokogiri(value, ns_strs, language)
98
+ return value if value.is_a?(Nokogiri::XML::NodeSet)
99
+ # Add inherited namespaces to created root element so that they're inherited to sub-elements
100
+ elements = Nokogiri::XML::Document.parse("<foo #{ns_strs.join(" ")}>#{value.to_s}</foo>").root.children
101
+
102
+ elements.map do |c|
103
+ if c.is_a?(Nokogiri::XML::Element)
104
+ c = Nokogiri::XML.parse(c.dup.to_xml(:save_with => Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS)).root
105
+ # Gather namespaces from self and decendant nodes
106
+ c.traverse do |n|
107
+ ns = n.namespace
108
+ next unless ns
109
+ prefix = ns.prefix ? "xmlns:#{ns.prefix}" : "xmlns"
110
+ c[prefix] = ns.href.to_s unless c.namespaces[prefix]
111
+ end
112
+
113
+ # Add lanuage
114
+ if language && c["lang"].to_s.empty?
115
+ c["xml:lang"] = language
116
+ end
117
+ end
118
+ c
119
+ end
120
+ end
121
+
122
+ def serialize_nodeset_nokogiri(object)
123
+ object.map {|c| c.to_xml(:save_with => Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS)}.join("")
124
+ end
125
+ end # Nokogiri
126
+
127
+ if defined?(::LibXML)
128
+ def parse_value_libxml(value, ns_strs, language)
129
+ # Fixme
130
+ end
131
+
132
+ def serialize_nodeset_libxml(object)
133
+ # Fixme
134
+ end
135
+ end # LibXML
136
+
137
+ # REXML
138
+ def parse_value_rexml(value, ns_strs, language)
139
+ # Fixme
140
+ end
141
+
142
+ def serialize_nodeset_rexml(object)
143
+ # Fixme
144
+ end
145
+
146
+ end unless defined?(::RDF::Literal::XML)# class XML
147
+ end; end
@@ -0,0 +1,19 @@
1
+ module RDF
2
+ class URI
3
+ ##
4
+ # Joins several URIs together.
5
+ #
6
+ # @param [Array<String, URI, #to_str>] uris
7
+ # @return [URI]
8
+ #
9
+ # GK -- don't add a "/" at the end of URIs, due to rdfcore/xmlbase/test002.rdf
10
+ def join(*uris)
11
+ result = @uri
12
+ uris.each do |uri|
13
+ # result.path += '/' unless result.path.match(/[\#\/]$/) || uri.to_s[0..0] == "#"
14
+ result = result.join(uri)
15
+ end
16
+ self.class.new(result)
17
+ end
18
+ end
19
+ end
@@ -21,8 +21,6 @@ module RDF::RDFa
21
21
  $},
22
22
  Regexp::EXTENDED)
23
23
 
24
- XML_LITERAL = RDF['XMLLiteral']
25
-
26
24
  # Host language, One of:
27
25
  # :xhtml_rdfa_1_0
28
26
  # :xhtml_rdfa_1_1
@@ -71,9 +69,9 @@ module RDF::RDFa
71
69
  @base = base
72
70
  @parent_subject = @base
73
71
  @parent_object = nil
74
- @uri_mappings = {}
75
72
  @incomplete_triples = []
76
73
  @language = nil
74
+ @uri_mappings = host_defaults.fetch(:uri_mappings, {})
77
75
  @term_mappings = host_defaults.fetch(:term_mappings, {})
78
76
  @default_voabulary = host_defaults.fetch(:voabulary, nil)
79
77
  end
@@ -97,11 +95,11 @@ module RDF::RDFa
97
95
  ##
98
96
  # Initializes the RDFa reader instance.
99
97
  #
100
- # @param [IO, File, String]:: input
101
- # @param [Hash{Symbol => Object}]:: options
102
- # <em>options[:debug]</em>:: Array to place debug messages
103
- # <em>options[:strict]</em>:: Raise Error if true, continue with lax parsing, otherwise
104
- # <em>options[:base_uri]</em>:: Base URI to use for relative URIs.
98
+ # @param [Nokogiri::HTML::Document, Nokogiri::XML::Document, IO, File, String] input
99
+ # @option options [Array] :debug (nil) Array to place debug messages
100
+ # @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise
101
+ # @option options [Boolean] :base_uri (nil) Base URI to use for relative URIs.
102
+ # @return [reader]
105
103
  # @yield [reader]
106
104
  # @yieldparam [Reader] reader
107
105
  # @raise [RDF::ReaderError]:: Raises RDF::ReaderError if _strict_
@@ -109,7 +107,7 @@ module RDF::RDFa
109
107
  super do
110
108
  @debug = options[:debug]
111
109
  @strict = options[:strict]
112
- @base_uri = RDF::URI.new(options[:base_uri])
110
+ @base_uri = RDF::URI.intern(options[:base_uri])
113
111
  @@vocabulary_cache ||= {}
114
112
 
115
113
  @doc = case input
@@ -124,8 +122,6 @@ module RDF::RDFa
124
122
  end
125
123
  end
126
124
 
127
-
128
- # XXX Invoke the parser, and allow add_triple to make the callback?
129
125
  ##
130
126
  # Iterates the given block for each RDF statement in the input.
131
127
  #
@@ -150,6 +146,7 @@ module RDF::RDFa
150
146
  {
151
147
  :vocabulary => RDF::XHV.to_s,
152
148
  :prefix => "xhv",
149
+ :uri_mappings => {"xhv" => RDF::XHV.to_s}, # RDF::XHTML is wrong
153
150
  :term_mappings => %w(
154
151
  alternate appendix bookmark cite chapter contents copyright first glossary help icon index
155
152
  last license meta next p3pv1 prev role section stylesheet subsection start top up
@@ -179,6 +176,12 @@ module RDF::RDFa
179
176
 
180
177
  private
181
178
 
179
+ # Keep track of allocated BNodes
180
+ def bnode(value = nil)
181
+ @bnode_cache ||= {}
182
+ @bnode_cache[value.to_s] ||= RDF::Node.new(value)
183
+ end
184
+
182
185
  # Figure out the document path, if it is a Nokogiri::XML::Element or Attribute
183
186
  def node_path(node)
184
187
  case node
@@ -221,7 +224,7 @@ module RDF::RDFa
221
224
  base = base_el.attributes['href']
222
225
  # Strip any fragment from base
223
226
  base = base.to_s.split("#").first
224
- @base_uri = RDF::URI.new(base)
227
+ @base_uri = RDF::URI.intern(base)
225
228
  add_debug(base_el, "parse_whole_doc: base='#{base}'")
226
229
  end
227
230
 
@@ -238,10 +241,17 @@ module RDF::RDFa
238
241
  # local list of URI mappings via @profile.
239
242
  # If @profile is present, its value is processed as defined in RDFa Profiles.
240
243
  element.attributes['profile'].to_s.split(/\s/).each do |profile|
241
- # Don't try to open ourselves!
242
- if @base_uri == profile
243
- add_debug(element, "extract_mappings: skip recursive profile <#{profile}>")
244
+ if node_path(element) == "/html/head"
245
+ # Don't try to open ourselves!
246
+ add_debug(element, "extract_mappings: skip head profile <#{profile}>")
247
+ next
248
+ elsif @@vocabulary_cache[profile]
249
+ add_debug(element, "extract_mappings: cached profile <#{profile}>")
244
250
  @@vocabulary_cache[profile]
251
+ elsif @base_uri.to_s == profile
252
+ # Don't try to open ourselves!
253
+ add_debug(element, "extract_mappings: skip recursive profile <#{profile}>")
254
+ next
245
255
  elsif @@vocabulary_cache.has_key?(profile)
246
256
  add_debug(element, "extract_mappings: skip previously parsed profile <#{profile}>")
247
257
  else
@@ -258,7 +268,8 @@ module RDF::RDFa
258
268
  old_debug, old_verbose, = $DEBUG, $verbose
259
269
  $DEBUG, $verbose = false, false
260
270
  # FIXME: format shouldn't need to be specified here
261
- p_graph = RDF::Graph.load(profile, :base_uri => profile, :format => :rdfa)
271
+ p_graph = RDF::Graph.load(profile, :base_uri => profile, :format => RDF::Format.for(profile) || :rdfa)
272
+ puts p_graph.inspect if old_debug
262
273
  $DEBUG, $verbose = old_debug, old_verbose
263
274
  p_graph.each_subject do |subject|
264
275
  # If one of the objects is not a Literal no mapping is created.
@@ -282,7 +293,7 @@ module RDF::RDFa
282
293
  # triple that is the common subject of an rdfa:term and an rdfa:uri predicate, create a
283
294
  # mapping from the object literal of the rdfa:term predicate to the object literal of the
284
295
  # rdfa:uri predicate. Add or update this mapping in the local term mappings.
285
- tm[term.value] = RDF::URI.new(uri.value) if term
296
+ tm[term.value] = RDF::URI.intern(uri.value) if term
286
297
  end
287
298
  # FIXME: subject isn't in scope here
288
299
  #rescue RDF::ReaderError
@@ -317,7 +328,6 @@ module RDF::RDFa
317
328
  # Set mappings from @prefix
318
329
  # prefix is a whitespace separated list of prefix-name URI pairs of the form
319
330
  # NCName ':' ' '+ xs:anyURI
320
- # SPEC Confusion: prefix is forced to lower-case in @profile, but not specified here.
321
331
  mappings = element.attributes["prefix"].to_s.split(/\s+/)
322
332
  while mappings.length > 0 do
323
333
  prefix, uri = mappings.shift.downcase, mappings.shift
@@ -405,7 +415,8 @@ module RDF::RDFa
405
415
  else
406
416
  language
407
417
  end
408
- add_debug(element, "HTML5 [3.2.3.3] traverse, lang: #{language}") if attrs['lang']
418
+ language = nil if language.to_s.empty?
419
+ add_debug(element, "HTML5 [3.2.3.3] traverse, lang: #{language || 'nil'}") if attrs['lang']
409
420
 
410
421
  # rels and revs
411
422
  rels = process_uris(element, rel, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
@@ -439,7 +450,7 @@ module RDF::RDFa
439
450
  # From XHTML+RDFa 1.1:
440
451
  # if no URI is provided, then first check to see if the element is the head or body element.
441
452
  # If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
442
- new_subject = RDF::URI.new(evaluation_context.base)
453
+ new_subject = RDF::URI.intern(evaluation_context.base)
443
454
  elsif element.attributes['typeof']
444
455
  new_subject = RDF::Node.new
445
456
  else
@@ -465,7 +476,7 @@ module RDF::RDFa
465
476
  # From XHTML+RDFa 1.1:
466
477
  # if no URI is provided, then first check to see if the element is the head or body element.
467
478
  # If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
468
- new_subject = RDF::URI.new(evaluation_context.base)
479
+ new_subject = RDF::URI.intern(evaluation_context.base)
469
480
  elsif element.attributes['typeof']
470
481
  new_subject = RDF::Node.new
471
482
  else
@@ -528,18 +539,18 @@ module RDF::RDFa
528
539
 
529
540
  # the following 3 IF clauses should be mutually exclusive. Written as is to prevent extensive indentation.
530
541
  type_resource = process_uri(element, type, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary) if type
531
- if type and !type.empty? and (type_resource.to_s != XML_LITERAL.to_s)
542
+ if type and !type.empty? and (type_resource.to_s != RDF.XMLLiteral.to_s)
532
543
  # typed literal
533
544
  add_debug(element, "[Step 11] typed literal")
534
- current_object_literal = RDF::Literal.new(content || element.inner_text, :datatype => type_resource, :language => language)
545
+ current_object_literal = RDF::Literal.new(content || element.inner_text.to_s, :datatype => type_resource, :language => language)
535
546
  elsif content or (children_node_types == [Nokogiri::XML::Text]) or (element.children.length == 0) or (type == '')
536
547
  # plain literal
537
548
  add_debug(element, "[Step 11] plain literal")
538
- current_object_literal = RDF::Literal.new(content || element.inner_text, :language => language)
539
- elsif children_node_types != [Nokogiri::XML::Text] and (type == nil or type_resource.to_s == XML_LITERAL.to_s)
549
+ current_object_literal = RDF::Literal.new(content || element.inner_text.to_s, :language => language)
550
+ elsif children_node_types != [Nokogiri::XML::Text] and (type == nil or type_resource.to_s == RDF.XMLLiteral.to_s)
540
551
  # XML Literal
541
552
  add_debug(element, "[Step 11] XML Literal: #{element.inner_html}")
542
- current_object_literal = RDF::Literal.new(element.inner_html, :datatype => XML_LITERAL, :language => language, :namespaces => uri_mappings)
553
+ current_object_literal = RDF::Literal.new(element.inner_html, :datatype => RDF.XMLLiteral, :language => language, :namespaces => uri_mappings.merge("" => "http://www.w3.org/1999/xhtml"))
543
554
  recurse = false
544
555
  end
545
556
 
@@ -635,7 +646,7 @@ module RDF::RDFa
635
646
  add_debug(element, "process_uri: #{value} => CURIE => <#{uri}>")
636
647
  else
637
648
  ## FIXME: throw exception if there is no base uri set?
638
- uri = RDF::URI.new(evaluation_context.base + value)
649
+ uri = RDF::URI.intern(RDF::URI.intern(evaluation_context.base).join(value))
639
650
  add_debug(element, "process_uri: #{value} => URI => <#{uri}>")
640
651
  end
641
652
  uri
@@ -656,7 +667,7 @@ module RDF::RDFa
656
667
  options[:term_mappings][value.to_s.downcase]
657
668
  when options[:vocab]
658
669
  # Otherwise, if there is a local default vocabulary the URI is obtained by concatenating that value and the term.
659
- RDF::URI.new(options[:vocab] + value)
670
+ RDF::URI.intern(options[:vocab] + value)
660
671
  else
661
672
  # Finally, if there is no local default vocabulary, the term has no associated URI and must be ignored.
662
673
  nil
@@ -670,13 +681,13 @@ module RDF::RDFa
670
681
 
671
682
  # consider the bnode situation
672
683
  if prefix == "_"
673
- RDF::Node.new(reference)
684
+ bnode(reference)
674
685
  elsif curie.to_s.match(/^:/)
675
686
  # Default prefix
676
687
  if uri_mappings[""]
677
- RDF::URI.new(uri_mappings[""] + reference)
688
+ RDF::URI.intern(uri_mappings[""] + reference.to_s)
678
689
  elsif @host_defaults[:prefix]
679
- RDF::URI.new(@host_defaults[:prefix] + reference)
690
+ RDF::URI.intern(uri_mappings[@host_defaults[:prefix]] + reference.to_s)
680
691
  end
681
692
  elsif !curie.to_s.match(/:/)
682
693
  # No prefix, undefined (in this context, it is evaluated as a term elsewhere)
@@ -685,7 +696,7 @@ module RDF::RDFa
685
696
  # Prefixes always downcased
686
697
  ns = uri_mappings[prefix.to_s.downcase]
687
698
  if ns
688
- RDF::URI.new(ns +reference)
699
+ RDF::URI.intern(ns + reference.to_s)
689
700
  else
690
701
  add_debug(element, "curie_to_resource_or_bnode No namespace mapping for #{prefix.downcase}")
691
702
  nil