scrappy 0.1.15 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ === 0.1.16 2011-02-09
2
+
3
+ * Added support to RDF abbreviated output by loading kb's prefixes
4
+ * Fixed bug in wikitext <pre> formatting
5
+
1
6
  === 0.1.15 2011-02-04
2
7
 
3
8
  * Added literal output formats such as HTML or Wikitext
data/bin/scrappy CHANGED
@@ -143,10 +143,17 @@ Copyright
143
143
  open(cache_file) { |f| Marshal.load(f) }
144
144
  else
145
145
  # Load YARF files and cache kb
146
- data = Dir["#{data_folder}/*"].inject(RDF::Graph.new) { |graph, file| extension = file.split('.').last.to_sym; graph.merge(extension==:ignore ? RDF::Graph.new : RDF::Parser.parse(extension, open(file).read)) }
146
+ data = Dir["#{data_folder}/*"].inject(RDF::Graph.new) do |kb, file|
147
+ extension = file.split('.').last.to_sym
148
+ graph = RDF::Parser.parse(extension, open(file).read)
149
+ kb.ns.merge! graph.ns
150
+ kb.merge!(extension==:ignore ? RDF::Graph.new : graph)
151
+ kb
152
+ end
147
153
  open(cache_file, "w") { |f| Marshal.dump(data, f) }
148
154
  data
149
155
  end
156
+ RDF::QURI.ns.merge! Agent::Options.kb.ns
150
157
  end
151
158
  end
152
159
 
data/lib/scrappy.rb CHANGED
@@ -21,7 +21,7 @@ require 'scrappy/agent/agent'
21
21
  Namespace :sc, 'http://lab.gsi.dit.upm.es/scraping.rdf#'
22
22
 
23
23
  module Scrappy
24
- VERSION = '0.1.15'
24
+ VERSION = '0.1.16'
25
25
  end
26
26
 
27
27
  # Require selectors
@@ -11,9 +11,13 @@ module Scrappy
11
11
  doc.search("h4").each {|n| n.replace(Nokogiri::XML::Text.new("==== #{n.text.strip} ====", n.document)) }
12
12
  doc.search("h5").each {|n| n.replace(Nokogiri::XML::Text.new("===== #{n.text.strip} =====", n.document)) }
13
13
  doc.search("b").each {|n| n.replace(Nokogiri::XML::Text.new("'''#{n.text.strip}'''", n.document)) }
14
+ doc.search("li li li li li").each {|n| n.replace(Nokogiri::XML::Text.new("***** #{n.text.strip}", n.document)) }
15
+ doc.search("li li li li").each {|n| n.replace(Nokogiri::XML::Text.new("**** #{n.text.strip}", n.document)) }
16
+ doc.search("li li li").each {|n| n.replace(Nokogiri::XML::Text.new("*** #{n.text.strip}", n.document)) }
17
+ doc.search("li li").each {|n| n.replace(Nokogiri::XML::Text.new("** #{n.text.strip}", n.document)) }
14
18
  doc.search("li").each {|n| n.replace(Nokogiri::XML::Text.new("* #{n.text.strip}", n.document)) }
15
19
  doc.search("ul").each {|n| n.replace(Nokogiri::XML::Text.new(n.text.strip, n.document)) }
16
- doc.search("pre, code").each {|n| n.replace(Nokogiri::XML::Text.new("<pre>\n#{n.text.strip}\n</pre>", n.document)) }
20
+ doc.search("pre, code").each {|n| n.replace(Nokogiri::XML::Text.new("<pre>#{n.text}</pre>", n.document)) }
17
21
  doc.search("p").each {|n| n.replace(Nokogiri::XML::Text.new("#{n.text.strip}\n", n.document)) }
18
22
  doc.text.strip
19
23
  when Node('sc:Html') then
data/scrappy.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{scrappy}
5
- s.version = "0.1.15"
5
+ s.version = "0.1.16"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jose Ignacio"]
9
- s.date = %q{2011-02-04}
9
+ s.date = %q{2011-02-09}
10
10
  s.default_executable = %q{scrappy}
11
11
  s.description = %q{RDF web scraper}
12
12
  s.email = %q{joseignacio.fernandez@gmail.com}
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 15
9
- version: 0.1.15
8
+ - 16
9
+ version: 0.1.16
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jose Ignacio
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-02-04 00:00:00 +01:00
17
+ date: 2011-02-09 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency