rdfa_parser 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,10 @@
1
+ === 0.1.5
2
+ * Gem is now deprecated, see [RDF::RDFa](http://github.org/gkellogg/rdf-rdfa)
3
+
4
+ === 0.1.4
5
+ * Reduce dependence on HTML namespace in RDFa parser.
6
+ * Add RDFa tests for HTML4 and HTML5
7
+
1
8
  === 0.1.3
2
9
  * Added rdfa-test-suite as sub-module and refactored RDFa tests to use them.
3
10
  * Fix but in white-space separated attributes
@@ -4,12 +4,14 @@
4
4
 
5
5
  == DESCRIPTION:
6
6
 
7
+ This Gem deprecated, see rdf-rdfa[http://github.org/gkellogg/rdf-rdfa] gem.
8
+
7
9
  RDFa parser written in pure Ruby. Yields each triple, or generate in-memory graph
8
10
 
9
11
  == FEATURES/PROBLEMS:
10
12
 
11
- * Most, but not all tests pass.
12
- * Tests performed by comparison to expected NTriples, not based on SPARQL query
13
+ * Fully compliant RDFa 1.0 parser
14
+ * RDFa tests use SPARQL for most tests due to Rasqal limitataions. Other tests compare directly against N-triples
13
15
  * Ultimately, this should be merged in with the Reddy gem to become part of a comprehensive Ruby RDF implementation
14
16
  * Support libraries (Graph, Triple, and URIRef are used substantially intact, to facilitate a future merger)
15
17
 
data/Rakefile CHANGED
@@ -4,8 +4,8 @@ begin
4
4
  require 'jeweler'
5
5
  Jeweler::Tasks.new do |gemspec|
6
6
  gemspec.name = "rdfa_parser"
7
- gemspec.summary = "RDFa parser written in pure Ruby."
8
- gemspec.description = " Yields each triple, or generate in-memory graph"
7
+ gemspec.summary = "[Deprecated] RDFa parser written in pure Ruby."
8
+ gemspec.description = "This gem is no longer supported, please see http://rubygems.org/gems/rdf-rdfa"
9
9
  gemspec.email = "gregg@kellogg-assoc.com"
10
10
  gemspec.homepage = "http://github.com/gkellogg/rdfa_parser"
11
11
  gemspec.authors = ["Gregg Kellogg"]
@@ -20,33 +20,3 @@ begin
20
20
  rescue LoadError
21
21
  puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
22
22
  end
23
-
24
- require 'spec/rake/spectask'
25
- Spec::Rake::SpecTask.new(:spec) do |spec|
26
- spec.libs << 'lib' << 'spec'
27
- spec.spec_files = FileList['spec/**/*_spec.rb']
28
- end
29
-
30
- Spec::Rake::SpecTask.new(:rcov) do |spec|
31
- spec.libs << 'lib' << 'spec'
32
- spec.pattern = 'spec/**/*_spec.rb'
33
- spec.rcov = true
34
- end
35
-
36
- task :spec => :check_dependencies
37
-
38
- task :default => :spec
39
-
40
- require 'rake/rdoctask'
41
- Rake::RDocTask.new do |rdoc|
42
- if File.exist?('VERSION')
43
- version = File.read('VERSION')
44
- else
45
- version = RdfaParser::VERSION
46
- end
47
-
48
- rdoc.rdoc_dir = 'rdoc'
49
- rdoc.title = "rdfa_parser #{version}"
50
- rdoc.rdoc_files.include('README*', "History.txt")
51
- rdoc.rdoc_files.include('lib/**/*.rb')
52
- end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.1.5
@@ -72,9 +72,9 @@ module RdfaParser
72
72
  # XMLLiteral and String values are encoding using C-style strings with
73
73
  # non-printable ASCII characters escaped.
74
74
  def format_as_n3(content, lang)
75
- content = c_style(content.to_s)
75
+ content = escape(content.to_s)
76
76
  quoted_content = should_quote? ? "\"#{content}\"" : content
77
- "#{quoted_content}^^<#{value}>#{lang ? "@#{lang}" : ""}"
77
+ "#{quoted_content}^^<#{value}>"
78
78
  end
79
79
 
80
80
  def format_as_trix(content, lang)
@@ -140,10 +140,19 @@ module RdfaParser
140
140
  '/' => '/',
141
141
  } # :nodoc:
142
142
 
143
- # Convert a UTF8 encoded Ruby string _string_ to a C-style string, encoded with
143
+ # Convert a UTF8 encoded Ruby string _string_ to an escaped string, encoded with
144
144
  # UTF16 big endian characters as \U????, and return it.
145
+ #
146
+ # \\:: Backslash
147
+ # \':: Single quote
148
+ # \":: Double quot
149
+ # \n:: ASCII Linefeed
150
+ # \r:: ASCII Carriage Return
151
+ # \t:: ASCCII Horizontal Tab
152
+ # \uhhhh:: character in BMP with Unicode value U+hhhh
153
+ # \U00hhhhhh:: character in plane 1-16 with Unicode value U+hhhhhh
145
154
  if String.method_defined?(:force_encoding)
146
- def c_style(string) # :nodoc:
155
+ def escape(string) # :nodoc:
147
156
  string << '' # XXX workaround: avoid buffer sharing
148
157
  string.force_encoding(Encoding::ASCII_8BIT)
149
158
  string.gsub!(/["\\\/\x0-\x1f]/) { MAP[$&] }
@@ -163,7 +172,7 @@ module RdfaParser
163
172
  string
164
173
  end
165
174
  else
166
- def c_style(string) # :nodoc:
175
+ def escape(string) # :nodoc:
167
176
  string = string.gsub(/["\\\/\x0-\x1f]/) { MAP[$&] }
168
177
  string.gsub!(/(
169
178
  (?:
@@ -178,7 +187,12 @@ module RdfaParser
178
187
  s.gsub!(/.{4}/n, '\\\\u\&')
179
188
  }
180
189
  string
181
- end
190
+ end
191
+ end
192
+
193
+ # Reverse operation of escape
194
+ def unescape(string)
195
+ string.gsub(/\\([\\\'\"nrt]|u\h{4}|U00\h[6])/) {MAP.invert[$&]}
182
196
  end
183
197
  end
184
198
 
@@ -188,7 +202,7 @@ module RdfaParser
188
202
  end
189
203
 
190
204
  def format_as_n3(content, lang)
191
- "\"#{c_style(content)}\"" + (lang ? "@#{lang}" : "")
205
+ "\"#{escape(content)}\"" + (lang ? "@#{lang}" : "")
192
206
  # Perform translation on value if it's typed
193
207
  end
194
208
 
@@ -229,7 +243,7 @@ module RdfaParser
229
243
  end
230
244
 
231
245
  def format_as_n3(content, lang)
232
- "\"#{c_style(content)}\"^^<#{value}>"
246
+ "\"#{escape(content)}\"^^<#{value}>"
233
247
  end
234
248
 
235
249
  def format_as_trix(content, lang)
@@ -66,6 +66,11 @@ module RdfaParser
66
66
  end
67
67
  end
68
68
 
69
+ def eql?(other)
70
+ @short == other.short && @uri == other.uri && @fragment == other.fragment
71
+ end
72
+ alias_method :==, :eql?
73
+
69
74
  # Output xmlns attribute name
70
75
  def xmlns_attr
71
76
  short.nil? ? "xmlns" : "xmlns:#{short}"
@@ -50,7 +50,9 @@ module RdfaParser
50
50
 
51
51
  # Create new parser instance. Options:
52
52
  # _graph_:: Graph to parse into, otherwie a new RdfaParser::Graph instance is created
53
+ # @deprecated Please use rdf-rdfa[http://github.org/gkellogg/rdf-rdfa] instead
53
54
  def initialize(options = {})
55
+ warn "[DEPRECATION] `rdfa_parser` gem is deprecated. Please use rdf-rb gem instead."
54
56
  options = {:graph => Graph.new}.merge(options)
55
57
  @debug = []
56
58
  BNode.reset # Start sequence anew
@@ -59,7 +61,7 @@ module RdfaParser
59
61
  @graph = options[:graph]
60
62
  end
61
63
 
62
- # Parse XHRML+RDFa document from a string or input stream to closure or graph.
64
+ # Parse XHTML+RDFa document from a string or input stream to closure or graph.
63
65
  # _base_ indicates the base URI of the document.
64
66
  #
65
67
  # Optionally, the stream may be a Nokogiri::HTML::Document or Nokogiri::XML::Document
@@ -117,7 +119,7 @@ module RdfaParser
117
119
  # Parsing an RDFa document (this is *not* the recursive method)
118
120
  def parse_whole_document(doc, base)
119
121
  # find if the document has a base element
120
- base_el = doc.xpath('/html:html/html:head/html:base', @namespace.xmlns_hash).first
122
+ base_el = doc.css('html>head>base').first
121
123
  if (base_el)
122
124
  base = base_el.attributes['href']
123
125
  # Strip any fragment from base
@@ -261,6 +261,21 @@ describe "Literals: " do
261
261
  end
262
262
  end
263
263
 
264
+ describe "an n3 literal" do
265
+ {
266
+ "simple literal" => ["simple literal", nil, nil],
267
+ "backslash:\\" => ["backslash:\\\\", nil, nil],
268
+ "dquote:\"" => ["dquote:\\\"", nil, nil],
269
+ "newline:\n" => ["newline:\\n", nil, nil],
270
+ "return:\r" => ["return:\\r", nil, nil],
271
+ "tab:\t" => ["tab:\\t", nil, nil],
272
+ }.each_pair do |name, args|
273
+ specify "test #{name}" do
274
+ Literal.n3_encoded(*args).contents.should == name
275
+ end
276
+ end
277
+ end
278
+
264
279
  # it "build_from_language" do
265
280
  # english = Literal.build_from_language("Have a nice day")
266
281
  # english.encoding.should == "en"
@@ -1,4 +1,4 @@
1
- <$TCPATH/0115.xhtml> <http://www.example.com/entity> "<string>" .
2
- <$TCPATH/0115.xhtml> <http://www.example.com/entity> "Ben & Co." .
3
- <$TCPATH/0115.xhtml> <http://www.example.com/entity> "@" .
4
- <$TCPATH/0115.xhtml> <http://www.example.com/entity> "@" .
1
+ <$TCPATH/0115.xhtml> <http://www.example.com/entity1> ">" .
2
+ <$TCPATH/0115.xhtml> <http://www.example.com/entity2> "Ben & Co." .
3
+ <$TCPATH/0115.xhtml> <http://www.example.com/entity3> "@" .
4
+ <$TCPATH/0115.xhtml> <http://www.example.com/entity4> "@" .
@@ -11,6 +11,9 @@ module RdfaHelper
11
11
  BASE_MANIFEST_URL = "http://rdfa.digitalbazaar.com/test-suite/"
12
12
  BASE_TEST_CASE_URL = "#{BASE_MANIFEST_URL}test-cases/"
13
13
 
14
+ HTMLRE = Regexp.new('([0-9]{4,4})\.xhtml')
15
+ TCPATHRE = Regexp.compile('\$TCPATH')
16
+
14
17
  attr_accessor :about
15
18
  attr_accessor :name
16
19
  attr_accessor :contributor
@@ -98,8 +101,6 @@ module RdfaHelper
98
101
  end.compact.join("\n")
99
102
 
100
103
  namespaces.chop! # Remove trailing newline
101
- htmlre = Regexp.new('([0-9]{4,4})\.xhtml')
102
- tcpathre = Regexp.compile('\$TCPATH')
103
104
 
104
105
  case suite
105
106
  when "xhtml"
@@ -107,16 +108,16 @@ module RdfaHelper
107
108
  %(<?xml version="1.0" encoding="UTF-8"?>\n) +
108
109
  %(<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML+RDFa 1.0//EN" "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd">\n) +
109
110
  %(<html xmlns="http://www.w3.org/1999/xhtml" version="XHTML+RDFa 1.0"\n)
110
- head + "#{namespaces}>\n#{body.gsub(tcpathre, tcpath)}\n</html>"
111
+ head + "#{namespaces}>\n#{body.gsub(TCPATHRE, tcpath)}\n</html>"
111
112
  when "html4"
112
113
  head ="" +
113
114
  %(<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">\n) +
114
115
  %(<html version="XHTML+RDFa 1.0"\n)
115
- head + "#{namespaces}>\n#{body.gsub(tcpathre, tcpath).gsub(htmlre, '\1.html')}\n</html>"
116
+ head + "#{namespaces}>\n#{body.gsub(TCPATHRE, tcpath).gsub(HTMLRE, '\1.html')}\n</html>"
116
117
  when "html5"
117
118
  head = "<!DOCTYPE html>\n"
118
119
  head += namespaces.empty? ? %(<html version="HTML+RDFa 1.0">) : "<html\n#{namespaces}>"
119
- head + "\n#{body.gsub(tcpathre, tcpath).gsub(htmlre, '\1.html')}\n</html>"
120
+ head + "\n#{body.gsub(TCPATHRE, tcpath).gsub(HTMLRE, '\1.html')}\n</html>"
120
121
  else
121
122
  nil
122
123
  end
@@ -124,19 +125,17 @@ module RdfaHelper
124
125
 
125
126
  # Read in file, and apply modifications reference either .html or .xhtml
126
127
  def results
127
- htmlre = Regexp.new('([0-9]{4,4})\.xhtml')
128
- tcpathre = Regexp.compile('\$TCPATH')
129
-
130
128
  f = self.name + ".sparql"
131
- body = File.read(File.join(TEST_DIR, "tests", f)).gsub(tcpathre, tcpath)
129
+ body = File.read(File.join(TEST_DIR, "tests", f)).gsub(TCPATHRE, tcpath)
132
130
 
133
- suite == "xhtml" ? body : body.gsub(htmlre, '\1.html')
131
+ suite == "xhtml" ? body : body.gsub(HTMLRE, '\1.html')
134
132
  end
135
133
 
136
134
  def triples
137
- tcpathre = Regexp.compile('\$TCPATH')
138
135
  f = self.name + ".nt"
139
- File.read(File.join(NT_DIR, f)).gsub(tcpathre, tcpath)
136
+ body = File.read(File.join(NT_DIR, f)).gsub(TCPATHRE, tcpath)
137
+
138
+ suite == "xhtml" ? body : body.gsub(HTMLRE, '\1.html')
140
139
  end
141
140
 
142
141
  # Run test case, yields input for parser to create triples
@@ -185,7 +184,7 @@ module RdfaHelper
185
184
 
186
185
  @@test_cases = test_hash.values.map {|statements| TestCase.new(statements, suite)}.
187
186
  compact.
188
- sort_by{|t| t.about.is_a?(URI) ? t.about.to_s : "zzz"}
187
+ sort_by{|t| t.name}
189
188
  end
190
189
  end
191
190
  end
@@ -93,12 +93,12 @@ describe "RDFa parser" do
93
93
  end
94
94
 
95
95
  # W3C Test suite from http://www.w3.org/2006/07/SWD/RDFa/testsuite/
96
- %w(xhtml).each do |suite|
96
+ %w(xhtml html4 html5).each do |suite|
97
97
  describe "w3c #{suite} testcases" do
98
98
  describe "that are approved" do
99
99
  test_cases(suite).each do |t|
100
100
  next unless t.status == "approved"
101
- #next unless t.name =~ /0131/
101
+ #next unless t.name =~ /017\d/
102
102
  #puts t.inspect
103
103
  specify "test #{t.name}: #{t.title}#{", (negative test)" unless t.expectedResults}" do
104
104
  begin
@@ -106,11 +106,7 @@ describe "RDFa parser" do
106
106
  rdfa_parser.parse(rdfa_string, t.informationResourceInput)
107
107
  end
108
108
  rescue Spec::Expectations::ExpectationNotMetError => e
109
- if t.title =~ /XML/
110
- pending("XML Tests known to not work propery with Rasqal") { raise }
111
- else
112
- raise
113
- end
109
+ raise
114
110
  end
115
111
  end
116
112
  end
@@ -118,15 +114,15 @@ describe "RDFa parser" do
118
114
  describe "that are unreviewed" do
119
115
  test_cases(suite).each do |t|
120
116
  next unless t.status == "unreviewed"
121
- #next unless t.name =~ /0154/
117
+ #next unless t.name =~ /0092/
122
118
  #puts t.inspect
123
119
  specify "test #{t.name}: #{t.title}#{", (negative test)" unless t.expectedResults}" do
124
120
  begin
125
121
  t.run_test do |rdfa_string, rdfa_parser|
126
122
  rdfa_parser.parse(rdfa_string, t.informationResourceInput)
127
123
  end
128
- #rescue Spec::Expectations::ExpectationNotMetError => e
129
- # pending() { raise }
124
+ rescue Spec::Expectations::ExpectationNotMetError => e
125
+ pending() { raise }
130
126
  end
131
127
  end
132
128
  end
metadata CHANGED
@@ -1,7 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rdfa_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ prerelease:
5
+ version: 0.1.5
5
6
  platform: ruby
6
7
  authors:
7
8
  - Gregg Kellogg
@@ -9,60 +10,65 @@ autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
12
 
12
- date: 2009-11-12 00:00:00 -08:00
13
+ date: 2011-06-11 00:00:00 -07:00
13
14
  default_executable: rdfa_parser
14
15
  dependencies:
15
16
  - !ruby/object:Gem::Dependency
16
17
  name: addressable
17
- type: :runtime
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
20
21
  requirements:
21
22
  - - ">="
22
23
  - !ruby/object:Gem::Version
23
24
  version: 2.0.0
24
- version:
25
+ type: :runtime
26
+ version_requirements: *id001
25
27
  - !ruby/object:Gem::Dependency
26
28
  name: nokogiri
27
- type: :runtime
28
- version_requirement:
29
- version_requirements: !ruby/object:Gem::Requirement
29
+ prerelease: false
30
+ requirement: &id002 !ruby/object:Gem::Requirement
31
+ none: false
30
32
  requirements:
31
33
  - - ">="
32
34
  - !ruby/object:Gem::Version
33
35
  version: 1.3.3
34
- version:
36
+ type: :runtime
37
+ version_requirements: *id002
35
38
  - !ruby/object:Gem::Dependency
36
39
  name: builder
37
- type: :runtime
38
- version_requirement:
39
- version_requirements: !ruby/object:Gem::Requirement
40
+ prerelease: false
41
+ requirement: &id003 !ruby/object:Gem::Requirement
42
+ none: false
40
43
  requirements:
41
44
  - - ">="
42
45
  - !ruby/object:Gem::Version
43
46
  version: 2.1.2
44
- version:
47
+ type: :runtime
48
+ version_requirements: *id003
45
49
  - !ruby/object:Gem::Dependency
46
50
  name: rspec
47
- type: :development
48
- version_requirement:
49
- version_requirements: !ruby/object:Gem::Requirement
51
+ prerelease: false
52
+ requirement: &id004 !ruby/object:Gem::Requirement
53
+ none: false
50
54
  requirements:
51
55
  - - ">="
52
56
  - !ruby/object:Gem::Version
53
57
  version: "0"
54
- version:
58
+ type: :development
59
+ version_requirements: *id004
55
60
  - !ruby/object:Gem::Dependency
56
61
  name: activesupport
57
- type: :development
58
- version_requirement:
59
- version_requirements: !ruby/object:Gem::Requirement
62
+ prerelease: false
63
+ requirement: &id005 !ruby/object:Gem::Requirement
64
+ none: false
60
65
  requirements:
61
66
  - - ">="
62
67
  - !ruby/object:Gem::Version
63
68
  version: 2.3.0
64
- version:
65
- description: " Yields each triple, or generate in-memory graph"
69
+ type: :development
70
+ version_requirements: *id005
71
+ description: This gem is no longer supported, please see http://rubygems.org/gems/rdf-rdfa
66
72
  email: gregg@kellogg-assoc.com
67
73
  executables:
68
74
  - rdfa_parser
@@ -72,14 +78,12 @@ extra_rdoc_files:
72
78
  - History.txt
73
79
  - README.rdoc
74
80
  files:
75
- - .gitignore
76
81
  - .gitmodules
77
82
  - History.txt
78
83
  - README.rdoc
79
84
  - Rakefile
80
85
  - VERSION
81
86
  - bin/rdfa_parser
82
- - earl-xhtml1-approved.txt
83
87
  - lib/rdfa_parser.rb
84
88
  - lib/rdfa_parser/bnode.rb
85
89
  - lib/rdfa_parser/exceptions.rb
@@ -224,37 +228,28 @@ homepage: http://github.com/gkellogg/rdfa_parser
224
228
  licenses: []
225
229
 
226
230
  post_install_message:
227
- rdoc_options:
228
- - --charset=UTF-8
231
+ rdoc_options: []
232
+
229
233
  require_paths:
230
234
  - lib
231
235
  required_ruby_version: !ruby/object:Gem::Requirement
236
+ none: false
232
237
  requirements:
233
238
  - - ">="
234
239
  - !ruby/object:Gem::Version
235
240
  version: "0"
236
- version:
237
241
  required_rubygems_version: !ruby/object:Gem::Requirement
242
+ none: false
238
243
  requirements:
239
244
  - - ">="
240
245
  - !ruby/object:Gem::Version
241
246
  version: "0"
242
- version:
243
247
  requirements: []
244
248
 
245
249
  rubyforge_project:
246
- rubygems_version: 1.3.5
250
+ rubygems_version: 1.6.2
247
251
  signing_key:
248
252
  specification_version: 3
249
- summary: RDFa parser written in pure Ruby.
250
- test_files:
251
- - spec/bnode_spec.rb
252
- - spec/graph_spec.rb
253
- - spec/literal_spec.rb
254
- - spec/matchers.rb
255
- - spec/namespace_spec.rb
256
- - spec/rdfa_helper.rb
257
- - spec/rdfa_parser_spec.rb
258
- - spec/spec_helper.rb
259
- - spec/triple_spec.rb
260
- - spec/uriref_spec.rb
253
+ summary: "[Deprecated] RDFa parser written in pure Ruby."
254
+ test_files: []
255
+