rdfa_parser 0.1.3 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,10 @@
1
+ === 0.1.5
2
+ * Gem is now deprecated, see [RDF::RDFa](http://github.org/gkellogg/rdf-rdfa)
3
+
4
+ === 0.1.4
5
+ * Reduce dependence on HTML namespace in RDFa parser.
6
+ * Add RDFa tests for HTML4 and HTML5
7
+
1
8
  === 0.1.3
2
9
  * Added rdfa-test-suite as sub-module and refactored RDFa tests to use them.
3
10
  * Fix but in white-space separated attributes
@@ -4,12 +4,14 @@
4
4
 
5
5
  == DESCRIPTION:
6
6
 
7
+ This Gem deprecated, see rdf-rdfa[http://github.org/gkellogg/rdf-rdfa] gem.
8
+
7
9
  RDFa parser written in pure Ruby. Yields each triple, or generate in-memory graph
8
10
 
9
11
  == FEATURES/PROBLEMS:
10
12
 
11
- * Most, but not all tests pass.
12
- * Tests performed by comparison to expected NTriples, not based on SPARQL query
13
+ * Fully compliant RDFa 1.0 parser
14
+ * RDFa tests use SPARQL for most tests due to Rasqal limitataions. Other tests compare directly against N-triples
13
15
  * Ultimately, this should be merged in with the Reddy gem to become part of a comprehensive Ruby RDF implementation
14
16
  * Support libraries (Graph, Triple, and URIRef are used substantially intact, to facilitate a future merger)
15
17
 
data/Rakefile CHANGED
@@ -4,8 +4,8 @@ begin
4
4
  require 'jeweler'
5
5
  Jeweler::Tasks.new do |gemspec|
6
6
  gemspec.name = "rdfa_parser"
7
- gemspec.summary = "RDFa parser written in pure Ruby."
8
- gemspec.description = " Yields each triple, or generate in-memory graph"
7
+ gemspec.summary = "[Deprecated] RDFa parser written in pure Ruby."
8
+ gemspec.description = "This gem is no longer supported, please see http://rubygems.org/gems/rdf-rdfa"
9
9
  gemspec.email = "gregg@kellogg-assoc.com"
10
10
  gemspec.homepage = "http://github.com/gkellogg/rdfa_parser"
11
11
  gemspec.authors = ["Gregg Kellogg"]
@@ -20,33 +20,3 @@ begin
20
20
  rescue LoadError
21
21
  puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
22
22
  end
23
-
24
- require 'spec/rake/spectask'
25
- Spec::Rake::SpecTask.new(:spec) do |spec|
26
- spec.libs << 'lib' << 'spec'
27
- spec.spec_files = FileList['spec/**/*_spec.rb']
28
- end
29
-
30
- Spec::Rake::SpecTask.new(:rcov) do |spec|
31
- spec.libs << 'lib' << 'spec'
32
- spec.pattern = 'spec/**/*_spec.rb'
33
- spec.rcov = true
34
- end
35
-
36
- task :spec => :check_dependencies
37
-
38
- task :default => :spec
39
-
40
- require 'rake/rdoctask'
41
- Rake::RDocTask.new do |rdoc|
42
- if File.exist?('VERSION')
43
- version = File.read('VERSION')
44
- else
45
- version = RdfaParser::VERSION
46
- end
47
-
48
- rdoc.rdoc_dir = 'rdoc'
49
- rdoc.title = "rdfa_parser #{version}"
50
- rdoc.rdoc_files.include('README*', "History.txt")
51
- rdoc.rdoc_files.include('lib/**/*.rb')
52
- end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.1.5
@@ -72,9 +72,9 @@ module RdfaParser
72
72
  # XMLLiteral and String values are encoding using C-style strings with
73
73
  # non-printable ASCII characters escaped.
74
74
  def format_as_n3(content, lang)
75
- content = c_style(content.to_s)
75
+ content = escape(content.to_s)
76
76
  quoted_content = should_quote? ? "\"#{content}\"" : content
77
- "#{quoted_content}^^<#{value}>#{lang ? "@#{lang}" : ""}"
77
+ "#{quoted_content}^^<#{value}>"
78
78
  end
79
79
 
80
80
  def format_as_trix(content, lang)
@@ -140,10 +140,19 @@ module RdfaParser
140
140
  '/' => '/',
141
141
  } # :nodoc:
142
142
 
143
- # Convert a UTF8 encoded Ruby string _string_ to a C-style string, encoded with
143
+ # Convert a UTF8 encoded Ruby string _string_ to an escaped string, encoded with
144
144
  # UTF16 big endian characters as \U????, and return it.
145
+ #
146
+ # \\:: Backslash
147
+ # \':: Single quote
148
+ # \":: Double quot
149
+ # \n:: ASCII Linefeed
150
+ # \r:: ASCII Carriage Return
151
+ # \t:: ASCCII Horizontal Tab
152
+ # \uhhhh:: character in BMP with Unicode value U+hhhh
153
+ # \U00hhhhhh:: character in plane 1-16 with Unicode value U+hhhhhh
145
154
  if String.method_defined?(:force_encoding)
146
- def c_style(string) # :nodoc:
155
+ def escape(string) # :nodoc:
147
156
  string << '' # XXX workaround: avoid buffer sharing
148
157
  string.force_encoding(Encoding::ASCII_8BIT)
149
158
  string.gsub!(/["\\\/\x0-\x1f]/) { MAP[$&] }
@@ -163,7 +172,7 @@ module RdfaParser
163
172
  string
164
173
  end
165
174
  else
166
- def c_style(string) # :nodoc:
175
+ def escape(string) # :nodoc:
167
176
  string = string.gsub(/["\\\/\x0-\x1f]/) { MAP[$&] }
168
177
  string.gsub!(/(
169
178
  (?:
@@ -178,7 +187,12 @@ module RdfaParser
178
187
  s.gsub!(/.{4}/n, '\\\\u\&')
179
188
  }
180
189
  string
181
- end
190
+ end
191
+ end
192
+
193
+ # Reverse operation of escape
194
+ def unescape(string)
195
+ string.gsub(/\\([\\\'\"nrt]|u\h{4}|U00\h[6])/) {MAP.invert[$&]}
182
196
  end
183
197
  end
184
198
 
@@ -188,7 +202,7 @@ module RdfaParser
188
202
  end
189
203
 
190
204
  def format_as_n3(content, lang)
191
- "\"#{c_style(content)}\"" + (lang ? "@#{lang}" : "")
205
+ "\"#{escape(content)}\"" + (lang ? "@#{lang}" : "")
192
206
  # Perform translation on value if it's typed
193
207
  end
194
208
 
@@ -229,7 +243,7 @@ module RdfaParser
229
243
  end
230
244
 
231
245
  def format_as_n3(content, lang)
232
- "\"#{c_style(content)}\"^^<#{value}>"
246
+ "\"#{escape(content)}\"^^<#{value}>"
233
247
  end
234
248
 
235
249
  def format_as_trix(content, lang)
@@ -66,6 +66,11 @@ module RdfaParser
66
66
  end
67
67
  end
68
68
 
69
+ def eql?(other)
70
+ @short == other.short && @uri == other.uri && @fragment == other.fragment
71
+ end
72
+ alias_method :==, :eql?
73
+
69
74
  # Output xmlns attribute name
70
75
  def xmlns_attr
71
76
  short.nil? ? "xmlns" : "xmlns:#{short}"
@@ -50,7 +50,9 @@ module RdfaParser
50
50
 
51
51
  # Create new parser instance. Options:
52
52
  # _graph_:: Graph to parse into, otherwie a new RdfaParser::Graph instance is created
53
+ # @deprecated Please use rdf-rdfa[http://github.org/gkellogg/rdf-rdfa] instead
53
54
  def initialize(options = {})
55
+ warn "[DEPRECATION] `rdfa_parser` gem is deprecated. Please use rdf-rb gem instead."
54
56
  options = {:graph => Graph.new}.merge(options)
55
57
  @debug = []
56
58
  BNode.reset # Start sequence anew
@@ -59,7 +61,7 @@ module RdfaParser
59
61
  @graph = options[:graph]
60
62
  end
61
63
 
62
- # Parse XHRML+RDFa document from a string or input stream to closure or graph.
64
+ # Parse XHTML+RDFa document from a string or input stream to closure or graph.
63
65
  # _base_ indicates the base URI of the document.
64
66
  #
65
67
  # Optionally, the stream may be a Nokogiri::HTML::Document or Nokogiri::XML::Document
@@ -117,7 +119,7 @@ module RdfaParser
117
119
  # Parsing an RDFa document (this is *not* the recursive method)
118
120
  def parse_whole_document(doc, base)
119
121
  # find if the document has a base element
120
- base_el = doc.xpath('/html:html/html:head/html:base', @namespace.xmlns_hash).first
122
+ base_el = doc.css('html>head>base').first
121
123
  if (base_el)
122
124
  base = base_el.attributes['href']
123
125
  # Strip any fragment from base
@@ -261,6 +261,21 @@ describe "Literals: " do
261
261
  end
262
262
  end
263
263
 
264
+ describe "an n3 literal" do
265
+ {
266
+ "simple literal" => ["simple literal", nil, nil],
267
+ "backslash:\\" => ["backslash:\\\\", nil, nil],
268
+ "dquote:\"" => ["dquote:\\\"", nil, nil],
269
+ "newline:\n" => ["newline:\\n", nil, nil],
270
+ "return:\r" => ["return:\\r", nil, nil],
271
+ "tab:\t" => ["tab:\\t", nil, nil],
272
+ }.each_pair do |name, args|
273
+ specify "test #{name}" do
274
+ Literal.n3_encoded(*args).contents.should == name
275
+ end
276
+ end
277
+ end
278
+
264
279
  # it "build_from_language" do
265
280
  # english = Literal.build_from_language("Have a nice day")
266
281
  # english.encoding.should == "en"
@@ -1,4 +1,4 @@
1
- <$TCPATH/0115.xhtml> <http://www.example.com/entity> "<string>" .
2
- <$TCPATH/0115.xhtml> <http://www.example.com/entity> "Ben & Co." .
3
- <$TCPATH/0115.xhtml> <http://www.example.com/entity> "@" .
4
- <$TCPATH/0115.xhtml> <http://www.example.com/entity> "@" .
1
+ <$TCPATH/0115.xhtml> <http://www.example.com/entity1> ">" .
2
+ <$TCPATH/0115.xhtml> <http://www.example.com/entity2> "Ben & Co." .
3
+ <$TCPATH/0115.xhtml> <http://www.example.com/entity3> "@" .
4
+ <$TCPATH/0115.xhtml> <http://www.example.com/entity4> "@" .
@@ -11,6 +11,9 @@ module RdfaHelper
11
11
  BASE_MANIFEST_URL = "http://rdfa.digitalbazaar.com/test-suite/"
12
12
  BASE_TEST_CASE_URL = "#{BASE_MANIFEST_URL}test-cases/"
13
13
 
14
+ HTMLRE = Regexp.new('([0-9]{4,4})\.xhtml')
15
+ TCPATHRE = Regexp.compile('\$TCPATH')
16
+
14
17
  attr_accessor :about
15
18
  attr_accessor :name
16
19
  attr_accessor :contributor
@@ -98,8 +101,6 @@ module RdfaHelper
98
101
  end.compact.join("\n")
99
102
 
100
103
  namespaces.chop! # Remove trailing newline
101
- htmlre = Regexp.new('([0-9]{4,4})\.xhtml')
102
- tcpathre = Regexp.compile('\$TCPATH')
103
104
 
104
105
  case suite
105
106
  when "xhtml"
@@ -107,16 +108,16 @@ module RdfaHelper
107
108
  %(<?xml version="1.0" encoding="UTF-8"?>\n) +
108
109
  %(<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML+RDFa 1.0//EN" "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd">\n) +
109
110
  %(<html xmlns="http://www.w3.org/1999/xhtml" version="XHTML+RDFa 1.0"\n)
110
- head + "#{namespaces}>\n#{body.gsub(tcpathre, tcpath)}\n</html>"
111
+ head + "#{namespaces}>\n#{body.gsub(TCPATHRE, tcpath)}\n</html>"
111
112
  when "html4"
112
113
  head ="" +
113
114
  %(<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">\n) +
114
115
  %(<html version="XHTML+RDFa 1.0"\n)
115
- head + "#{namespaces}>\n#{body.gsub(tcpathre, tcpath).gsub(htmlre, '\1.html')}\n</html>"
116
+ head + "#{namespaces}>\n#{body.gsub(TCPATHRE, tcpath).gsub(HTMLRE, '\1.html')}\n</html>"
116
117
  when "html5"
117
118
  head = "<!DOCTYPE html>\n"
118
119
  head += namespaces.empty? ? %(<html version="HTML+RDFa 1.0">) : "<html\n#{namespaces}>"
119
- head + "\n#{body.gsub(tcpathre, tcpath).gsub(htmlre, '\1.html')}\n</html>"
120
+ head + "\n#{body.gsub(TCPATHRE, tcpath).gsub(HTMLRE, '\1.html')}\n</html>"
120
121
  else
121
122
  nil
122
123
  end
@@ -124,19 +125,17 @@ module RdfaHelper
124
125
 
125
126
  # Read in file, and apply modifications reference either .html or .xhtml
126
127
  def results
127
- htmlre = Regexp.new('([0-9]{4,4})\.xhtml')
128
- tcpathre = Regexp.compile('\$TCPATH')
129
-
130
128
  f = self.name + ".sparql"
131
- body = File.read(File.join(TEST_DIR, "tests", f)).gsub(tcpathre, tcpath)
129
+ body = File.read(File.join(TEST_DIR, "tests", f)).gsub(TCPATHRE, tcpath)
132
130
 
133
- suite == "xhtml" ? body : body.gsub(htmlre, '\1.html')
131
+ suite == "xhtml" ? body : body.gsub(HTMLRE, '\1.html')
134
132
  end
135
133
 
136
134
  def triples
137
- tcpathre = Regexp.compile('\$TCPATH')
138
135
  f = self.name + ".nt"
139
- File.read(File.join(NT_DIR, f)).gsub(tcpathre, tcpath)
136
+ body = File.read(File.join(NT_DIR, f)).gsub(TCPATHRE, tcpath)
137
+
138
+ suite == "xhtml" ? body : body.gsub(HTMLRE, '\1.html')
140
139
  end
141
140
 
142
141
  # Run test case, yields input for parser to create triples
@@ -185,7 +184,7 @@ module RdfaHelper
185
184
 
186
185
  @@test_cases = test_hash.values.map {|statements| TestCase.new(statements, suite)}.
187
186
  compact.
188
- sort_by{|t| t.about.is_a?(URI) ? t.about.to_s : "zzz"}
187
+ sort_by{|t| t.name}
189
188
  end
190
189
  end
191
190
  end
@@ -93,12 +93,12 @@ describe "RDFa parser" do
93
93
  end
94
94
 
95
95
  # W3C Test suite from http://www.w3.org/2006/07/SWD/RDFa/testsuite/
96
- %w(xhtml).each do |suite|
96
+ %w(xhtml html4 html5).each do |suite|
97
97
  describe "w3c #{suite} testcases" do
98
98
  describe "that are approved" do
99
99
  test_cases(suite).each do |t|
100
100
  next unless t.status == "approved"
101
- #next unless t.name =~ /0131/
101
+ #next unless t.name =~ /017\d/
102
102
  #puts t.inspect
103
103
  specify "test #{t.name}: #{t.title}#{", (negative test)" unless t.expectedResults}" do
104
104
  begin
@@ -106,11 +106,7 @@ describe "RDFa parser" do
106
106
  rdfa_parser.parse(rdfa_string, t.informationResourceInput)
107
107
  end
108
108
  rescue Spec::Expectations::ExpectationNotMetError => e
109
- if t.title =~ /XML/
110
- pending("XML Tests known to not work propery with Rasqal") { raise }
111
- else
112
- raise
113
- end
109
+ raise
114
110
  end
115
111
  end
116
112
  end
@@ -118,15 +114,15 @@ describe "RDFa parser" do
118
114
  describe "that are unreviewed" do
119
115
  test_cases(suite).each do |t|
120
116
  next unless t.status == "unreviewed"
121
- #next unless t.name =~ /0154/
117
+ #next unless t.name =~ /0092/
122
118
  #puts t.inspect
123
119
  specify "test #{t.name}: #{t.title}#{", (negative test)" unless t.expectedResults}" do
124
120
  begin
125
121
  t.run_test do |rdfa_string, rdfa_parser|
126
122
  rdfa_parser.parse(rdfa_string, t.informationResourceInput)
127
123
  end
128
- #rescue Spec::Expectations::ExpectationNotMetError => e
129
- # pending() { raise }
124
+ rescue Spec::Expectations::ExpectationNotMetError => e
125
+ pending() { raise }
130
126
  end
131
127
  end
132
128
  end
metadata CHANGED
@@ -1,7 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rdfa_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ prerelease:
5
+ version: 0.1.5
5
6
  platform: ruby
6
7
  authors:
7
8
  - Gregg Kellogg
@@ -9,60 +10,65 @@ autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
12
 
12
- date: 2009-11-12 00:00:00 -08:00
13
+ date: 2011-06-11 00:00:00 -07:00
13
14
  default_executable: rdfa_parser
14
15
  dependencies:
15
16
  - !ruby/object:Gem::Dependency
16
17
  name: addressable
17
- type: :runtime
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
20
21
  requirements:
21
22
  - - ">="
22
23
  - !ruby/object:Gem::Version
23
24
  version: 2.0.0
24
- version:
25
+ type: :runtime
26
+ version_requirements: *id001
25
27
  - !ruby/object:Gem::Dependency
26
28
  name: nokogiri
27
- type: :runtime
28
- version_requirement:
29
- version_requirements: !ruby/object:Gem::Requirement
29
+ prerelease: false
30
+ requirement: &id002 !ruby/object:Gem::Requirement
31
+ none: false
30
32
  requirements:
31
33
  - - ">="
32
34
  - !ruby/object:Gem::Version
33
35
  version: 1.3.3
34
- version:
36
+ type: :runtime
37
+ version_requirements: *id002
35
38
  - !ruby/object:Gem::Dependency
36
39
  name: builder
37
- type: :runtime
38
- version_requirement:
39
- version_requirements: !ruby/object:Gem::Requirement
40
+ prerelease: false
41
+ requirement: &id003 !ruby/object:Gem::Requirement
42
+ none: false
40
43
  requirements:
41
44
  - - ">="
42
45
  - !ruby/object:Gem::Version
43
46
  version: 2.1.2
44
- version:
47
+ type: :runtime
48
+ version_requirements: *id003
45
49
  - !ruby/object:Gem::Dependency
46
50
  name: rspec
47
- type: :development
48
- version_requirement:
49
- version_requirements: !ruby/object:Gem::Requirement
51
+ prerelease: false
52
+ requirement: &id004 !ruby/object:Gem::Requirement
53
+ none: false
50
54
  requirements:
51
55
  - - ">="
52
56
  - !ruby/object:Gem::Version
53
57
  version: "0"
54
- version:
58
+ type: :development
59
+ version_requirements: *id004
55
60
  - !ruby/object:Gem::Dependency
56
61
  name: activesupport
57
- type: :development
58
- version_requirement:
59
- version_requirements: !ruby/object:Gem::Requirement
62
+ prerelease: false
63
+ requirement: &id005 !ruby/object:Gem::Requirement
64
+ none: false
60
65
  requirements:
61
66
  - - ">="
62
67
  - !ruby/object:Gem::Version
63
68
  version: 2.3.0
64
- version:
65
- description: " Yields each triple, or generate in-memory graph"
69
+ type: :development
70
+ version_requirements: *id005
71
+ description: This gem is no longer supported, please see http://rubygems.org/gems/rdf-rdfa
66
72
  email: gregg@kellogg-assoc.com
67
73
  executables:
68
74
  - rdfa_parser
@@ -72,14 +78,12 @@ extra_rdoc_files:
72
78
  - History.txt
73
79
  - README.rdoc
74
80
  files:
75
- - .gitignore
76
81
  - .gitmodules
77
82
  - History.txt
78
83
  - README.rdoc
79
84
  - Rakefile
80
85
  - VERSION
81
86
  - bin/rdfa_parser
82
- - earl-xhtml1-approved.txt
83
87
  - lib/rdfa_parser.rb
84
88
  - lib/rdfa_parser/bnode.rb
85
89
  - lib/rdfa_parser/exceptions.rb
@@ -224,37 +228,28 @@ homepage: http://github.com/gkellogg/rdfa_parser
224
228
  licenses: []
225
229
 
226
230
  post_install_message:
227
- rdoc_options:
228
- - --charset=UTF-8
231
+ rdoc_options: []
232
+
229
233
  require_paths:
230
234
  - lib
231
235
  required_ruby_version: !ruby/object:Gem::Requirement
236
+ none: false
232
237
  requirements:
233
238
  - - ">="
234
239
  - !ruby/object:Gem::Version
235
240
  version: "0"
236
- version:
237
241
  required_rubygems_version: !ruby/object:Gem::Requirement
242
+ none: false
238
243
  requirements:
239
244
  - - ">="
240
245
  - !ruby/object:Gem::Version
241
246
  version: "0"
242
- version:
243
247
  requirements: []
244
248
 
245
249
  rubyforge_project:
246
- rubygems_version: 1.3.5
250
+ rubygems_version: 1.6.2
247
251
  signing_key:
248
252
  specification_version: 3
249
- summary: RDFa parser written in pure Ruby.
250
- test_files:
251
- - spec/bnode_spec.rb
252
- - spec/graph_spec.rb
253
- - spec/literal_spec.rb
254
- - spec/matchers.rb
255
- - spec/namespace_spec.rb
256
- - spec/rdfa_helper.rb
257
- - spec/rdfa_parser_spec.rb
258
- - spec/spec_helper.rb
259
- - spec/triple_spec.rb
260
- - spec/uriref_spec.rb
253
+ summary: "[Deprecated] RDFa parser written in pure Ruby."
254
+ test_files: []
255
+