dmoz_sax_doc 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,28 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ dmoz_sax_doc (0.0.2)
5
+ json (~> 1.7)
6
+ nokogiri (~> 1.5)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ diff-lcs (1.2.1)
12
+ json (1.7.7)
13
+ nokogiri (1.5.6)
14
+ rspec (2.13.0)
15
+ rspec-core (~> 2.13.0)
16
+ rspec-expectations (~> 2.13.0)
17
+ rspec-mocks (~> 2.13.0)
18
+ rspec-core (2.13.0)
19
+ rspec-expectations (2.13.0)
20
+ diff-lcs (>= 1.1.3, < 2.0)
21
+ rspec-mocks (2.13.0)
22
+
23
+ PLATFORMS
24
+ ruby
25
+
26
+ DEPENDENCIES
27
+ dmoz_sax_doc!
28
+ rspec (~> 2.12)
data/README.md ADDED
@@ -0,0 +1,46 @@
1
+ # DMOZ SAX Documents
2
+
3
+ This gem provides a StructureDocument and ContentDocument which are subclasses of the Nokogiri::XML::SAX::Document class and are intended to enable parse the content.rdf.u8 and structure.rdf.u8 files available on the dmoz.org page. This gem is not affiliated with the DMOZ project.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'dmoz_sax_doc'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install dmoz_sax_doc
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Open Directory License
24
+
25
+ This gem makes use of snippets of the DMOZ RDF content available from dmoz.org as test files. DMOZ open directory project is [licensed](http://www.dmoz.org/license.html) under [Creative Commons Attribution 3.0 Unported](http://creativecommons.org/licenses/by/3.0/).
26
+
27
+ <table>
28
+ <tr align="center">
29
+ <td>Help build the largest human-edited directory on the web.</td>
30
+ </tr>
31
+ <tr align="center">
32
+ <td>
33
+ <a href="/cgi-bin/add.cgi?where=Top">Submit a Site</a> -
34
+ <a href="/about.html"><b>Open Directory Project</b></a> -
35
+ <a href="/cgi-bin/apply.cgi?where=Top">Become an Editor</a>
36
+ </td></tr>
37
+ </table>
38
+
39
+
40
+ ## Contributing
41
+
42
+ 1. Fork it
43
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
44
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
45
+ 4. Push to the branch (`git push origin my-new-feature`)
46
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+ RSpec::Core::RakeTask.new('spec')
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'dmoz_sax/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "dmoz_sax_doc"
8
+ gem.version = DmozSax::VERSION
9
+ gem.authors = ["Galen Palmer"]
10
+ gem.email = ["palmergs@gmail.com"]
11
+ gem.description = %q{Use a SAX parser to visit either the structure.u8 or content.u8 DMOZ files.}
12
+ gem.summary = %q{SAX visitor for DMOZ structure of content files.}
13
+ gem.homepage = "https://github.com/palmergs/dmoz_sax_doc"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_dependency 'nokogiri', '~> 1.5'
21
+ gem.add_dependency 'json', '~> 1.7'
22
+
23
+ gem.add_development_dependency 'rspec', '~> 2.12'
24
+ end
@@ -0,0 +1,9 @@
1
+ module DmozSax
2
+ class Alias
3
+ attr_accessor :path, :title
4
+ def initialize path_str
5
+ @path = DmozSax::Path.new path_str
6
+ @title = @path.name
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,65 @@
1
+ require 'nokogiri'
2
+
3
+ module DmozSax
4
+ class ContentDocument < Nokogiri::XML::SAX::Document
5
+
6
+ attr_accessor :on_topic, :on_external_page
7
+ attr_accessor :name_parser, :time_parser
8
+
9
+ def initialize
10
+ super
11
+
12
+ @name_parser = NameParser.new
13
+ @time_parser = TimeParser.new
14
+ end
15
+
16
+ def characters string
17
+ @buffer ||= ""
18
+ @buffer << string
19
+ end
20
+
21
+ def start_element name, attributes = []
22
+ @buffer = ""
23
+ @name = name
24
+
25
+ case name
26
+ when 'Topic'
27
+ @topic = DmozSax::Topic.new attributes[0][1]
28
+ when /^link/
29
+ @topic.links << attributes[0][1]
30
+ when 'ExternalPage'
31
+ @priority = 0
32
+ @time = nil
33
+ @external = DmozSax::ExternalPage.new attributes[0][1]
34
+ end
35
+ end
36
+
37
+ def end_element name
38
+
39
+ case name
40
+ when 'catid'
41
+ @cid = @buffer.to_i
42
+ when 'd:Description'
43
+ @description = @buffer.strip
44
+ when 'd:Title'
45
+ @title = @buffer.strip.gsub('_', ' ')
46
+ when 'Topic'
47
+ @topic.cid = @cid
48
+ @on_topic.call(@topic) unless @on_topic.nil?
49
+ when 'topic'
50
+ @path = DmozSax::Path.new @buffer
51
+ when 'mediadate'
52
+ @time = @time_parser.time_from @buffer
53
+ when 'priority'
54
+ @priority = @buffer.to_i
55
+ when 'ExternalPage'
56
+ @external.priority = @priority
57
+ @external.title = @title
58
+ @external.description = @description
59
+ @external.path = @path
60
+ @external.time = @time
61
+ @on_external_page.call(@external) unless @on_external_page.nil?
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,11 @@
1
+ module DmozSax
2
+ class ExternalPage
3
+ attr_accessor :url, :path, :title, :description, :priority, :time
4
+ def initialize url
5
+ @url = url
6
+ @path, @title, @description = nil, nil, nil
7
+ @priority = 0
8
+ @time = nil
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,8 @@
1
+ module DmozSax
2
+ class NameParser
3
+ def level_from name
4
+ match = name.to_s.match(/^([A-Za-z_-]+)([\d]+)?$/)
5
+ match[2].to_i
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,31 @@
1
+ require 'delegate'
2
+
3
+ module DmozSax
4
+ class Path < DelegateClass(Array)
5
+
6
+ attr_reader :name, :level
7
+
8
+ def initialize str, level = 0
9
+ resource = str.gsub('_', ' ').split(':')
10
+
11
+ @name = resource.first if resource.length == 2
12
+
13
+ unless resource.empty?
14
+ @path = resource.last.split('/').reject {|a| a =~ /^[A-Z]$/}
15
+ @path.shift if 'Top' == @path.first
16
+ else
17
+ @path = []
18
+ end
19
+ @level = level.to_i
20
+ super(@path.freeze)
21
+ end
22
+
23
+ def to_a
24
+ @path.dup
25
+ end
26
+
27
+ def to_s
28
+ @path.join('/')
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,65 @@
1
+ require 'nokogiri'
2
+
3
+ module DmozSax
4
+ class StructureDocument < Nokogiri::XML::SAX::Document
5
+
6
+ attr_accessor :on_topic, :on_alias
7
+ attr_accessor :name_parser, :time_parser
8
+
9
+ def initialize
10
+ super
11
+
12
+ @name_parser = NameParser.new
13
+ @time_parser = TimeParser.new
14
+ end
15
+
16
+ def characters string
17
+ @buffer ||= ""
18
+ @buffer << string
19
+ end
20
+
21
+ def start_element name, attributes = []
22
+ @buffer = ""
23
+
24
+ case name
25
+ when 'Topic'
26
+ @cid, @description, @title = nil, nil, nil
27
+ @topic = DmozSax::Topic.new attributes[0][1]
28
+ when 'Alias'
29
+ @alias = DmozSax::Alias.new attributes[0][1]
30
+ when 'Target'
31
+ @path = attributes[0][1]
32
+ when 'altlang'
33
+ @topic.alt_langs << DmozSax::Path.new(attributes[0][1])
34
+ when 'related'
35
+ @topic.related << DmozSax::Path.new(attributes[0][1])
36
+ when /^narrow/
37
+ @topic.narrows << DmozSax::Path.new(attributes[0][1], @name_parser.level_from(name))
38
+ when /^symbolic/
39
+ @topic.symbolics << DmozSax::Path.new(attributes[0][1], @name_parser.level_from(name))
40
+ end
41
+ end
42
+
43
+ def end_element name
44
+
45
+ case name
46
+ when 'catid'
47
+ @cid = @buffer.to_i
48
+ when 'd:Description'
49
+ @description = @buffer.strip
50
+ when 'd:Title'
51
+ @title = @buffer.strip.gsub('_', ' ')
52
+ when 'lastUpdate'
53
+ @time = @time_parser.time_from @buffer
54
+ when 'Alias'
55
+ @on_alias.call(@alias) unless @on_alias.nil?
56
+ when 'Topic'
57
+ @topic.cid = @cid
58
+ @topic.title = @title
59
+ @topic.description = @description
60
+ @topic.time = @time
61
+ @on_topic.call(@topic) unless @on_topic.nil?
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,8 @@
1
+ module DmozSax
2
+ class TimeParser
3
+ def time_from string
4
+ arr = string.split(/[-\s:]/).map(&:to_i)
5
+ Time.utc(arr[0], arr[1], arr[2], arr[3], arr[4], arr[5])
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,11 @@
1
+ module DmozSax
2
+ class Topic
3
+ attr_accessor :path, :cid, :title, :description, :time,
4
+ :narrows, :symbolics, :related, :alt_langs, :links
5
+ def initialize path_str
6
+ @path = DmozSax::Path.new path_str
7
+ @narrows, @symbolics, @related, @alt_langs, @links = [], [], [], [], []
8
+ @cid = nil
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,3 @@
1
+ module DmozSax
2
+ VERSION = "0.0.2"
3
+ end
data/lib/dmoz_sax.rb ADDED
@@ -0,0 +1,13 @@
1
+ require "dmoz_sax/version"
2
+ require "dmoz_sax/name_parser"
3
+ require "dmoz_sax/time_parser"
4
+ require "dmoz_sax/path"
5
+ require "dmoz_sax/alias"
6
+ require "dmoz_sax/topic"
7
+ require "dmoz_sax/external_page"
8
+ require "dmoz_sax/structure_document"
9
+ require "dmoz_sax/content_document"
10
+
11
+ module DmozSax
12
+
13
+ end
@@ -0,0 +1,6 @@
1
+ require 'spec_helper'
2
+
3
+ describe DmozSax::Alias do
4
+
5
+
6
+ end
@@ -0,0 +1,21 @@
1
+ require 'spec_helper'
2
+
3
+ describe DmozSax::ContentDocument do
4
+ #it 'can parse a real content.rdf.u8 document' do
5
+ # parser = Nokogiri::XML::SAX::Parser.new(DmozSax::ContentDocument.new)
6
+ # parser.parse(File.open('/opt/data/DMOZ/content.rdf.u8'))
7
+ #end
8
+
9
+ it 'can parse a sample content.rdf.u8 document' do
10
+
11
+ topics = []
12
+ pages = []
13
+
14
+ document = DmozSax::ContentDocument.new
15
+ document.on_topic = lambda {|t| topics << t }
16
+ document.on_external_page = lambda {|t| pages << t }
17
+
18
+ parser = Nokogiri::XML::SAX::Parser.new(document)
19
+ parser.parse(File.open('spec/samples/content_sample.rdf.u8'))
20
+ end
21
+ end
@@ -0,0 +1,5 @@
1
+ require 'spec_helper'
2
+
3
+ describe DmozSax::ExternalPage do
4
+
5
+ end
data/spec/path_spec.rb ADDED
@@ -0,0 +1,40 @@
1
+ require 'spec_helper'
2
+
3
+ describe DmozSax::Path do
4
+ it "takes a / delimited string in its initializer" do
5
+ path = DmozSax::Path.new('This/Topic/Path')
6
+ path.to_a.should == ['This','Topic','Path']
7
+ end
8
+
9
+ it "removes the 'Top' category and English index categories (e.g. 'a' to 'z')" do
10
+
11
+ ('A'..'Z').each do |char|
12
+ path = DmozSax::Path.new("Top/This/Topic/#{ char }/Path")
13
+ path.to_a.should == ['This','Topic','Path']
14
+ end
15
+ end
16
+
17
+ it "may optionally be preceeded by a name or identifier" do
18
+ path = DmozSax::Path.new("Sample_Directory:Top/This/Topic/Path")
19
+ path.name.should == 'Sample Directory'
20
+ path.to_a.should == ['This','Topic','Path']
21
+ end
22
+
23
+ context "as an immutable array" do
24
+
25
+ it "supports enumeration methods" do
26
+ path = DmozSax::Path.new 'This/Topic/Path'
27
+ path.length.should == 3
28
+ path.count.should == 3
29
+ path.size.should == 3
30
+
31
+ path.each do |a| a.should_not be_nil end
32
+ path.map {|a| a.downcase}.should == ['this','topic','path']
33
+ path.inject(0) {|i,a| i += a.length}.should == 13
34
+ end
35
+
36
+ it "throws exceptions if modification attempted" do
37
+ expect { path[0] = 'Bob' }.to raise_error
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,41 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <RDF xmlns:r="http://www.w3.org/TR/RDF/" xmlns:d="http://purl.org/dc/elements/1.0/" xmlns="http://dmoz.org/rdf/">
3
+ <!-- Generated at 2013-01-27 00:03:25 EST from DMOZ 2.0 -->
4
+ <Topic r:id="">
5
+ <catid>1</catid>
6
+ </Topic>
7
+ <Topic r:id="Top/Arts">
8
+ <catid>381773</catid>
9
+ </Topic>
10
+ <Topic r:id="Top/Arts/Animation">
11
+ <catid>423945</catid>
12
+ <link1 r:resource="http://www.awn.com/"></link1>
13
+ <link r:resource="http://animation.about.com/"></link>
14
+ <link r:resource="http://www.toonhound.com/"></link>
15
+ <link r:resource="http://enculturation.gmu.edu/2_1/pisters.html"></link>
16
+ <link r:resource="http://www.digitalmediafx.com/Features/animationhistory.html"></link>
17
+ <link r:resource="http://www.spark-online.com/august00/media/romano.html"></link>
18
+ <link r:resource="http://www.animated-divots.net/"></link>
19
+ </Topic>
20
+ <ExternalPage about="http://www.awn.com/">
21
+ <d:Title>Animation World Network</d:Title>
22
+ <d:Description>Provides information resources to the international animation community. Features include searchable database archives, monthly magazine, web animation guide, the Animation Village, discussion forums and other useful resources.</d:Description>
23
+ <priority>1</priority>
24
+ <topic>Top/Arts/Animation</topic>
25
+ </ExternalPage>
26
+ <ExternalPage about="http://animation.about.com/">
27
+ <d:Title>About.com: Animation Guide</d:Title>
28
+ <d:Description>Keep up with developments in online animation for all skill levels. Download tools, and seek inspiration from online work.</d:Description>
29
+ <topic>Top/Arts/Animation</topic>
30
+ </ExternalPage>
31
+ <ExternalPage about="http://www.toonhound.com/">
32
+ <d:Title>Toonhound</d:Title>
33
+ <d:Description>British cartoon, animation and comic strip creations - links, reviews and news from the UK.</d:Description>
34
+ <topic>Top/Arts/Animation</topic>
35
+ </ExternalPage>
36
+ <ExternalPage about="http://enculturation.gmu.edu/2_1/pisters.html">
37
+ <d:Title>Enculturation: From Mouse to Mouse: Overcoming Information</d:Title>
38
+ <d:Description>Essay by Patricia Pisters on the animated image and its changing relationship with the cinematic image.</d:Description>
39
+ <topic>Top/Arts/Animation</topic>
40
+ </ExternalPage>
41
+ </RDF>
@@ -0,0 +1,48 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <RDF xmlns:r="http://www.w3.org/TR/RDF/" xmlns:d="http://purl.org/dc/elements/1.0/" xmlns="http://dmoz.org/rdf/">
3
+ <!--
4
+
5
+ DMOZ Content is licensed under the Creative Commons
6
+ Attribution 3.0 Unported (CC BY 3.0)
7
+ ( http://creativecommons.org/licenses/by/3.0/ )
8
+
9
+
10
+ -->
11
+ <Topic r:id="Top/World">
12
+ <catid>2</catid>
13
+ <d:Title>Top</d:Title>
14
+ <lastUpdate>2010-08-16 06:05:15</lastUpdate>
15
+ <d:Description></d:Description>
16
+ <narrow r:resource="Top/Arts"></narrow>
17
+ <related r:resource="Kids_and_Teens"></related>
18
+ </Topic>
19
+ <Topic r:id="Top/Arts">
20
+ <catid>381773</catid>
21
+ <d:Title>Arts</d:Title>
22
+ <lastUpdate>2011-08-06 17:20:44</lastUpdate>
23
+ <d:Description>&lt;img src="/img/moz/mzcolor.gif" alt="Image from Mozilla museum: Mozilla as an Artist" width="128" height="120" align="right"&gt; &lt;p&gt;The ODP &lt;b&gt;Arts&lt;/b&gt; category contains English language sites about art, or "the use of skill and imagination in the creation of aesthetic objects, environments, or experiences that can be shared with others." This includes the "liberal arts," concerned with skill of expression in language, speech, and reasoning, and the "fine arts," concerned with affecting aesthetics directly, and especially affecting the sense of beauty. &lt;small&gt;(Quotes and paraphrases from &lt;a href="http://www.britannica.com/"&gt;Britannica.com&lt;/a&gt;)&lt;/small&gt; &lt;p&gt;Art is an abstract and subjective quality: It can be studied, but cannot be objectively measured, counted, weighed, or absolutely compared; it can only appeal to the viewers or audience's personal senses.</d:Description>
24
+ <narrow1 r:resource="Top/Arts/Directories"></narrow1>
25
+ <narrow1 r:resource="Top/Arts/News_and_Media"></narrow1>
26
+ <narrow1 r:resource="Top/Arts/Weblogs"></narrow1>
27
+ <narrow1 r:resource="Top/Arts/Chats_and_Forums"></narrow1>
28
+ <narrow2 r:resource="Top/Arts/Art_History"></narrow2>
29
+ <narrow2 r:resource="Top/Arts/Crafts"></narrow2>
30
+ <altlang r:resource="Tamil:Top/World/Tamil/கலை"></altlang>
31
+ <altlang r:resource="English:Top/World/O'zbekcha/San’at"></altlang>
32
+ <altlang r:resource="Euskara:Top/World/Euskara/Kultura"></altlang>
33
+ <altlang r:resource="Rumantsch:Top/World/Rumantsch/Art"></altlang>
34
+ <symbolic1 r:resource="Publishers:Top/Business/Publishing_and_Printing/Publishing/Books/Arts"></symbolic1>
35
+ <related r:resource="Top/Business/Arts_and_Entertainment"></related>
36
+ <symbolic2 r:resource="Native_and_Tribal:Top/Arts/Visual_Arts/Native_and_Tribal"></symbolic2>
37
+ <symbolic2 r:resource="Typography:Top/Arts/Graphic_Design/Typography"></symbolic2>
38
+ <related r:resource="Kids_and_Teens/Arts"></related>
39
+ </Topic>
40
+ <Alias r:id="Publishers:Top/Business/Publishing_and_Printing/Publishing/Books/Arts">
41
+ <d:Title>Publishers</d:Title>
42
+ <Target r:resource="Top/Business/Publishing_and_Printing/Publishing/Books/Arts"/>
43
+ </Alias>
44
+ <Alias r:id="Native_and_Tribal:Top/Arts/Visual_Arts/Native_and_Tribal">
45
+ <d:Title>Native_and_Tribal</d:Title>
46
+ <Target r:resource="Top/Arts/Visual_Arts/Native_and_Tribal"/>
47
+ </Alias>
48
+ </RDF>
@@ -0,0 +1,7 @@
1
+ require 'rspec'
2
+ require 'dmoz_sax'
3
+
4
+ RSpec.configure do |config|
5
+ config.color_enabled = true
6
+ config.formatter = 'documentation'
7
+ end
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+
3
+ describe DmozSax::StructureDocument do
4
+ #it 'can parse a real structure.rdf.u8 document' do
5
+ # parser = Nokogiri::XML::SAX::Parser.new(DmozSax::StructureDocument.new)
6
+ # parser.parse(File.open('/opt/data/DMOZ/structure.rdf.u8'))
7
+ #end
8
+
9
+ it 'can parse a sample structure.rdf.u8 document' do
10
+
11
+ topics = []
12
+ aliases = []
13
+
14
+ document = DmozSax::StructureDocument.new
15
+ document.on_topic = lambda {|t| topics << t }
16
+
17
+ document.on_alias = lambda {|a| aliases << a }
18
+
19
+ parser = Nokogiri::XML::SAX::Parser.new(document)
20
+ parser.parse(File.open('spec/samples/structure_sample.rdf.u8'))
21
+ topics.count.should == 2
22
+ topics[1].title.should == 'Arts'
23
+ topics[1].path.should == ['Arts']
24
+ topics[1].description.should include 'aesthetic objects'
25
+ topics[1].cid.should == 381773
26
+
27
+ aliases.count.should == 2
28
+ aliases[0].title.should == 'Publishers'
29
+ aliases[0].path.should == ['Business','Publishing and Printing','Publishing','Books','Arts']
30
+ end
31
+ end
@@ -0,0 +1,5 @@
1
+ require 'spec_helper'
2
+
3
+ describe DmozSax::Topic do
4
+
5
+ end
@@ -0,0 +1,7 @@
1
+ require 'spec_helper'
2
+
3
+ describe DmozSax::VERSION do
4
+ it "version number should be in a standard format" do
5
+ DmozSax::VERSION.should =~ /^[\d]+[.][\d]+[.][\d]+([.-][a-z0-9]+)?$/
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dmoz_sax_doc
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Galen Palmer
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-02-28 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.5'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.5'
30
+ - !ruby/object:Gem::Dependency
31
+ name: json
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '1.7'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '1.7'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '2.12'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '2.12'
62
+ description: Use a SAX parser to visit either the structure.u8 or content.u8 DMOZ
63
+ files.
64
+ email:
65
+ - palmergs@gmail.com
66
+ executables: []
67
+ extensions: []
68
+ extra_rdoc_files: []
69
+ files:
70
+ - Gemfile
71
+ - Gemfile.lock
72
+ - README.md
73
+ - Rakefile
74
+ - dmoz_sax_doc.gemspec
75
+ - lib/dmoz_sax.rb
76
+ - lib/dmoz_sax/alias.rb
77
+ - lib/dmoz_sax/content_document.rb
78
+ - lib/dmoz_sax/external_page.rb
79
+ - lib/dmoz_sax/name_parser.rb
80
+ - lib/dmoz_sax/path.rb
81
+ - lib/dmoz_sax/structure_document.rb
82
+ - lib/dmoz_sax/time_parser.rb
83
+ - lib/dmoz_sax/topic.rb
84
+ - lib/dmoz_sax/version.rb
85
+ - spec/alias_spec.rb
86
+ - spec/content_document_spec.rb
87
+ - spec/external_page_spec.rb
88
+ - spec/path_spec.rb
89
+ - spec/samples/content_sample.rdf.u8
90
+ - spec/samples/structure_sample.rdf.u8
91
+ - spec/spec_helper.rb
92
+ - spec/structure_document_spec.rb
93
+ - spec/topic_spec.rb
94
+ - spec/version_spec.rb
95
+ homepage: https://github.com/palmergs/dmoz_sax_doc
96
+ licenses: []
97
+ post_install_message:
98
+ rdoc_options: []
99
+ require_paths:
100
+ - lib
101
+ required_ruby_version: !ruby/object:Gem::Requirement
102
+ none: false
103
+ requirements:
104
+ - - '>='
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ required_rubygems_version: !ruby/object:Gem::Requirement
108
+ none: false
109
+ requirements:
110
+ - - '>='
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ requirements: []
114
+ rubyforge_project:
115
+ rubygems_version: 1.8.25
116
+ signing_key:
117
+ specification_version: 3
118
+ summary: SAX visitor for DMOZ structure of content files.
119
+ test_files:
120
+ - spec/alias_spec.rb
121
+ - spec/content_document_spec.rb
122
+ - spec/external_page_spec.rb
123
+ - spec/path_spec.rb
124
+ - spec/samples/content_sample.rdf.u8
125
+ - spec/samples/structure_sample.rdf.u8
126
+ - spec/spec_helper.rb
127
+ - spec/structure_document_spec.rb
128
+ - spec/topic_spec.rb
129
+ - spec/version_spec.rb