scrappy 0.1.15 → 0.1.16

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ === 0.1.16 2011-02-09
2
+
3
+ * Added support to RDF abbreviated output by loading kb's prefixes
4
+ * Fixed bug in wikitext <pre> formatting
5
+
1
6
  === 0.1.15 2011-02-04
2
7
 
3
8
  * Added literal output formats such as HTML or Wikitext
data/bin/scrappy CHANGED
@@ -143,10 +143,17 @@ Copyright
143
143
  open(cache_file) { |f| Marshal.load(f) }
144
144
  else
145
145
  # Load YARF files and cache kb
146
- data = Dir["#{data_folder}/*"].inject(RDF::Graph.new) { |graph, file| extension = file.split('.').last.to_sym; graph.merge(extension==:ignore ? RDF::Graph.new : RDF::Parser.parse(extension, open(file).read)) }
146
+ data = Dir["#{data_folder}/*"].inject(RDF::Graph.new) do |kb, file|
147
+ extension = file.split('.').last.to_sym
148
+ graph = RDF::Parser.parse(extension, open(file).read)
149
+ kb.ns.merge! graph.ns
150
+ kb.merge!(extension==:ignore ? RDF::Graph.new : graph)
151
+ kb
152
+ end
147
153
  open(cache_file, "w") { |f| Marshal.dump(data, f) }
148
154
  data
149
155
  end
156
+ RDF::QURI.ns.merge! Agent::Options.kb.ns
150
157
  end
151
158
  end
152
159
 
data/lib/scrappy.rb CHANGED
@@ -21,7 +21,7 @@ require 'scrappy/agent/agent'
21
21
  Namespace :sc, 'http://lab.gsi.dit.upm.es/scraping.rdf#'
22
22
 
23
23
  module Scrappy
24
- VERSION = '0.1.15'
24
+ VERSION = '0.1.16'
25
25
  end
26
26
 
27
27
  # Require selectors
@@ -11,9 +11,13 @@ module Scrappy
11
11
  doc.search("h4").each {|n| n.replace(Nokogiri::XML::Text.new("==== #{n.text.strip} ====", n.document)) }
12
12
  doc.search("h5").each {|n| n.replace(Nokogiri::XML::Text.new("===== #{n.text.strip} =====", n.document)) }
13
13
  doc.search("b").each {|n| n.replace(Nokogiri::XML::Text.new("'''#{n.text.strip}'''", n.document)) }
14
+ doc.search("li li li li li").each {|n| n.replace(Nokogiri::XML::Text.new("***** #{n.text.strip}", n.document)) }
15
+ doc.search("li li li li").each {|n| n.replace(Nokogiri::XML::Text.new("**** #{n.text.strip}", n.document)) }
16
+ doc.search("li li li").each {|n| n.replace(Nokogiri::XML::Text.new("*** #{n.text.strip}", n.document)) }
17
+ doc.search("li li").each {|n| n.replace(Nokogiri::XML::Text.new("** #{n.text.strip}", n.document)) }
14
18
  doc.search("li").each {|n| n.replace(Nokogiri::XML::Text.new("* #{n.text.strip}", n.document)) }
15
19
  doc.search("ul").each {|n| n.replace(Nokogiri::XML::Text.new(n.text.strip, n.document)) }
16
- doc.search("pre, code").each {|n| n.replace(Nokogiri::XML::Text.new("<pre>\n#{n.text.strip}\n</pre>", n.document)) }
20
+ doc.search("pre, code").each {|n| n.replace(Nokogiri::XML::Text.new("<pre>#{n.text}</pre>", n.document)) }
17
21
  doc.search("p").each {|n| n.replace(Nokogiri::XML::Text.new("#{n.text.strip}\n", n.document)) }
18
22
  doc.text.strip
19
23
  when Node('sc:Html') then
data/scrappy.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{scrappy}
5
- s.version = "0.1.15"
5
+ s.version = "0.1.16"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jose Ignacio"]
9
- s.date = %q{2011-02-04}
9
+ s.date = %q{2011-02-09}
10
10
  s.default_executable = %q{scrappy}
11
11
  s.description = %q{RDF web scraper}
12
12
  s.email = %q{joseignacio.fernandez@gmail.com}
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 15
9
- version: 0.1.15
8
+ - 16
9
+ version: 0.1.16
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jose Ignacio
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-02-04 00:00:00 +01:00
17
+ date: 2011-02-09 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency