docparser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 829c9585c7ac42c2496a8b385f2f19d4486a5e10
4
+ data.tar.gz: 39f576740941b72e61babec808af6756adfeb1b8
5
+ SHA512:
6
+ metadata.gz: 32a9dc03bb9f413641b25cb0e51c9951fa8e1a64a48f4b1c8cc8e1e3877cbaa87b1ac70738f85c20f7383a3446d5a37915ee53a09fd30099b00be553779f4f4a
7
+ data.tar.gz: b106a3fc8843a2d47f9d8c4f5ac65e39afc702fff8dcabfdfe89e0477731ac1c9576e9b795dcda53c927c8acc718846b2bb277a0e7e68654ead52cb97e612b3e
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ hackaday.*
data/.yardops ADDED
@@ -0,0 +1,2 @@
1
+ README.md
2
+ LICENSE
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ ruby '2.0.0'
2
+
3
+ source 'https://rubygems.org'
4
+ gem 'nokogiri'
5
+ gem 'parallel'
6
+ gem 'axlsx'
7
+ gem 'terminal-table'
8
+ gem 'pageme'
9
+ gem "json"
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Jurriaan Pruis
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,52 @@
1
+ # DocParser
2
+
3
+ Docs: http://rubydoc.info/github/jurriaan/docparser/
4
+
5
+ DocParser is a web scraping/screen scraping tool.
6
+ You can use it to easily scrape web sites.
7
+
8
+ ## Features
9
+
10
+ - XPath and CSS support through Nokogiri
11
+ - Support for loading of URLs throug open-uri
12
+ - Support for parallel processing of the documents
13
+ - 5 Output formats:
14
+ * CSV
15
+ * XLSX
16
+ * HTML
17
+ * YAML
18
+ * Screen (for debugging and development)
19
+
20
+ ## Installation
21
+
22
+ Add this line to your application's Gemfile:
23
+
24
+ gem 'docparser'
25
+
26
+ And then execute:
27
+
28
+ $ bundle
29
+
30
+ Or install it yourself as:
31
+
32
+ $ gem install docparser
33
+
34
+ ## Usage
35
+
36
+ See example.rb
37
+
38
+ ## Todo
39
+
40
+ - Tests
41
+
42
+ ## Contributing
43
+
44
+ 1. Fork it
45
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
46
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
47
+ 4. Push to the branch (`git push origin my-new-feature`)
48
+ 5. Create new Pull Request
49
+
50
+ ## Contributors
51
+
52
+ - Jurriaan Pruis
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/docparser.gemspec ADDED
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'docparser/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "docparser"
8
+ spec.version = DocParser::VERSION
9
+ spec.authors = ["Jurriaan Pruis"]
10
+ spec.email = ["email@jurriaanpruis.nl"]
11
+ spec.description = %q{DocParser is a Ruby Gem for webscraping}
12
+ spec.summary = %q{DocParser is a Ruby Gem for webscraping}
13
+ spec.homepage = "https://github.com/jurriaan/docparser"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ end
data/example.rb ADDED
@@ -0,0 +1,22 @@
1
+ #
2
+
3
+ # An example of parsing a popular dutch website..
4
+ # (C) 2013 Jurriaan Pruis
5
+ #
6
+
7
+ require 'docparser'
8
+ include DocParser
9
+ output = HTMLOutput.new filename: 'hackaday.html'
10
+ output.header = 'Title', 'Author', 'Publication date', 'URL', 'Summary'
11
+ parser = Parser.new(files: (1..20).map {|i| "http://hackaday.com/page/#{i}/"}, parallel: false, output: output)
12
+ parser.parse! do
13
+ css('#content .post') do |post|
14
+ title_el = post.search('.entry-title a').first
15
+ title = title_el.content
16
+ author =post.search('.post-info .author .fn a').first.content
17
+ published_time = post.search('.post-info .date.published').first.content
18
+ url = title_el.attributes['href']
19
+ summary = post.search('.entry-content').first.content.strip
20
+ add_row title, author, published_time, url, summary
21
+ end
22
+ end
data/lib/docparser.rb ADDED
@@ -0,0 +1 @@
1
+ require 'docparser/parser'
@@ -0,0 +1,79 @@
1
+ require 'set'
2
+ module DocParser
3
+ # The Document class loads and parses the files.
4
+ # @see Parser
5
+ # @see Output
6
+ class Document
7
+ attr_reader :filename, :doc, :encoding, :results
8
+ def initialize(filename, encoding: 'utf-8', parser: nil)
9
+ if encoding == 'utf-8'
10
+ encodingstring = 'r:utf-8'
11
+ else
12
+ encodingstring = "r:#{encoding}:utf-8"
13
+ end
14
+
15
+ open(filename, encodingstring) do |f|
16
+ @doc = Nokogiri::HTML(f)
17
+ end
18
+
19
+ @encoding = encoding
20
+ @parser = parser
21
+ @filename = filename
22
+ @results = Array.new(@parser.outputs.length) { [] }
23
+ end
24
+
25
+ # Adds a row to an output
26
+ def add_row(*row, output: 0)
27
+ output = @parser.outputs.index(output) if output.is_a? Output
28
+ results[output] << row.flatten
29
+ end
30
+
31
+ # Extracts the document title
32
+ # @return [String] the title of the document
33
+ def title
34
+ @title ||= xpath_content('//head/title')
35
+ end
36
+
37
+ # @return [String] the source of the document
38
+ def html
39
+ @html ||= @doc.inner_html #TODO: ??
40
+ end
41
+
42
+ # Executes a xpath query
43
+ def xpath(query)
44
+ res = @doc.search(query)
45
+ res.each { |el| yield el } if block_given?
46
+ end
47
+
48
+ # Executes a xpath query and returns the content
49
+ # @return [String] the content of the HTML node
50
+ def xpath_content(query)
51
+ first = @doc.search(query).first
52
+ if first.nil?
53
+ nil
54
+ else
55
+ first.content
56
+ end
57
+ end
58
+
59
+ # Matches the HTML source using a regular expression
60
+ def regexp(regexp)
61
+ html.match(regexp) rescue nil
62
+ end
63
+
64
+ # Parses the document
65
+ # @return [Array] containing the parse results
66
+ def parse!(&block)
67
+ instance_exec(&block)
68
+ results
69
+ end
70
+
71
+ # @!visibility private
72
+ def inspect
73
+ "<Document file:'#{@filename}'>"
74
+ end
75
+
76
+ alias :css :xpath
77
+ alias :css_content :xpath_content
78
+ end
79
+ end
@@ -0,0 +1,62 @@
1
+ module DocParser
2
+ # The Output base class.
3
+ # All Output classes inherit from this one.
4
+ class Output
5
+ attr_reader :rowcount
6
+
7
+ # Creates a new output
8
+ # @param filename [String] Output filename
9
+ def initialize(filename: filename)
10
+ @rowcount = 0
11
+ @filename = filename
12
+ raise ArgumentError, 'Please specify a filename' if filename.empty?
13
+ @file = open filename, 'w'
14
+ open_file
15
+ end
16
+
17
+ # Stores the header
18
+ def header=(row)
19
+ @header = row
20
+ header
21
+ end
22
+
23
+ # Adds a row
24
+ def add_row(row)
25
+ @rowcount += 1
26
+ write_row row
27
+ end
28
+
29
+ # Closes output and IO
30
+ def close
31
+ footer
32
+ @file.close unless @file.closed?
33
+ end
34
+
35
+ # Called after the file is opened
36
+ def open_file
37
+ # do nothing
38
+ end
39
+
40
+ # Called after header is set
41
+ def header
42
+ # do nothing
43
+ end
44
+
45
+ # Called when a row is added
46
+ def write_row(row)
47
+ raise 'No row writer defined'
48
+ end
49
+
50
+ # Called before closing the file
51
+ def footer
52
+ end
53
+
54
+ # Displays information about the output
55
+ # @return [String] containing number of rows and file size
56
+ def summary
57
+ "%s:\t%d rows, %9.2f KiB" % [@filename,
58
+ @rowcount,
59
+ File.size(@filename) / 1024.0]
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,20 @@
1
+ require 'csv'
2
+ module DocParser
3
+ # The CSVOutput class generates a CSV file containing all rows
4
+ # @see Output
5
+
6
+ class CSVOutput < Output
7
+ # @!visibility private
8
+ def open_file
9
+ @csv = CSV.new(@file, col_sep: ';')
10
+ end
11
+
12
+ def header
13
+ write_row @header
14
+ end
15
+
16
+ def write_row(row)
17
+ @csv << row
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,78 @@
1
+ require 'cgi'
2
+ module DocParser
3
+ # The XLSXOutput class generates an HTML file containing a table
4
+ # @see Output
5
+ class HTMLOutput < Output
6
+ # @!visibility private
7
+ HTMLHEADER = <<-EOS
8
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
9
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
10
+ <html>
11
+ <head>
12
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
13
+ <title>HTML output "#FILENAME#"</title>
14
+ <style type="text/css">
15
+ body {
16
+ font-family:"Helvetica Neue", Helvetica, Sans-Serif;
17
+ font-size:12px;
18
+ }
19
+ table {
20
+ border:1px solid #69c;
21
+ border-collapse:collapse;
22
+ font-size:12px;
23
+ text-align:left;
24
+ width:480px;
25
+ }
26
+ th {
27
+ border-bottom:1px dashed #69c;
28
+ color:#039;
29
+ font-size:14px;
30
+ font-weight:normal;
31
+ padding:12px 17px;
32
+ }
33
+ td {
34
+ color:#669;
35
+ padding:7px 17px;
36
+ white-space: pre;
37
+ }
38
+ tbody tr:hover td {
39
+ background:#d0dafd;
40
+ color:#339;
41
+ }
42
+ tbody tr:nth-child(even) {
43
+ background:#e0eaff;
44
+ }
45
+ </style>
46
+ </head>
47
+ <body>
48
+ <table>
49
+ EOS
50
+ # @!visibility private
51
+ HTMLFOOTER = <<-EOS
52
+ </tbody>
53
+ </table>
54
+ <p>#COUNT# rows</p>
55
+ </body>
56
+ </html>
57
+ EOS
58
+ def open_file
59
+ @file << HTMLHEADER.gsub('#FILENAME#', @filename)
60
+ end
61
+
62
+ def header
63
+ @file << '<thead><tr>'
64
+ @file << @header.map { |f| '<th>' + f + '</th>' }.join
65
+ @file << "</tr></thead>\n<tbody>\n"
66
+ end
67
+
68
+ def write_row(row)
69
+ @file << '<tr>'
70
+ @file << row.map { |f| '<td>' + CGI.escapeHTML(f.to_s) + '</td>' }.join
71
+ @file << "</tr>\n"
72
+ end
73
+
74
+ def footer
75
+ @file << HTMLFOOTER.gsub('#COUNT#', @rowcount.to_s)
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,30 @@
1
+ require 'json'
2
+ module DocParser
3
+ # The JSONOutput class generates a JSON file containing all rows as seperate
4
+ # JSON documents
5
+ # @see Output
6
+ class JSONOutput < Output
7
+ # @!visibility private
8
+ def open_file
9
+ @file << '['
10
+ @first = true
11
+ @doc = {}
12
+ end
13
+
14
+ def write_row(row)
15
+ if @first
16
+ @first = false
17
+ else
18
+ @file << ','
19
+ end
20
+ 0.upto(@header.length - 1) do |counter|
21
+ @doc[@header[counter]] = row[counter] rescue ''
22
+ end
23
+ @file << JSON.dump(@doc)
24
+ end
25
+
26
+ def close
27
+ @file << ']'
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,50 @@
1
+ module DocParser
2
+ # The MultiOutput output combines multiple outputs.
3
+ # It creates a CSV, HTML, YAML and XLSX Output file
4
+ # @see CSVOutput
5
+ # @see HTMLOutput
6
+ # @see YAMLOutput
7
+ # @see XLSXOutput
8
+ # @see Output
9
+ class MultiOutput < Output
10
+ # @!visibility private
11
+ def initialize(**options)
12
+ @outputs = []
13
+ csvoptions = options.clone
14
+ csvoptions[:filename] += '.csv'
15
+ htmloptions = options.clone
16
+ htmloptions[:filename] += '.html'
17
+ yamloptions = options.clone
18
+ yamloptions[:filename] += '.yml'
19
+ xlsxoptions = options.clone
20
+ xlsxoptions[:filename] += '.xlsx'
21
+ jsonoptions = options.clone
22
+ jsonoptions[:filename] += '.json'
23
+ @outputs << CSVOutput.new(csvoptions)
24
+ @outputs << HTMLOutput.new(htmloptions)
25
+ @outputs << YAMLOutput.new(yamloptions)
26
+ @outputs << XLSXOutput.new(xlsxoptions)
27
+ @outputs << XLSXOutput.new(jsonoptions)
28
+ end
29
+
30
+ def header=(row)
31
+ @outputs.each { |out| out.header = row.flatten }
32
+ end
33
+
34
+ def add_row(row)
35
+ @outputs.each { |out| out.add_row row.flatten }
36
+ end
37
+
38
+ def rowcount
39
+ @outputs.min { |out| out.rowcount }.rowcount
40
+ end
41
+
42
+ def close
43
+ @outputs.each { |out| out.close }
44
+ end
45
+
46
+ def summary
47
+ @outputs.map { |out| out.summary }.join("\n")
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,35 @@
1
+ require 'terminal-table'
2
+ require 'pageme'
3
+ module DocParser
4
+ # This Output is can be used for debugging purposes.
5
+
6
+ # It pipes all rows through a pager
7
+ # @see Output
8
+ class ScreenOutput < Output
9
+ # @!visibility private
10
+
11
+ include PageMe
12
+
13
+ def initialize
14
+ @tables = []
15
+ @rowcount = 0
16
+ end
17
+
18
+ def close
19
+ page do |p|
20
+ p.puts "Showing all #{@tables.length} rows:\n\n"
21
+ @tables.each do |table|
22
+ p.puts table
23
+ end
24
+ end
25
+ end
26
+
27
+ def write_row(row)
28
+ out = []
29
+ 0.upto(@header.length - 1) do |counter|
30
+ out << [@header[counter], row[counter]]
31
+ end
32
+ @tables << Terminal::Table.new(rows: out)
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,28 @@
1
+ require 'axlsx'
2
+ module DocParser
3
+ # The XLSXOutput class generates Microsoft Excel compatible .xlsx files
4
+ # using the great axslx library
5
+ # @see Output
6
+ class XLSXOutput < Output
7
+ # @!visibility private
8
+ def open_file
9
+ @package = Axlsx::Package.new
10
+ @package.workbook.date1904 = false # Fix for OS X
11
+ @sheet = @package.workbook.add_worksheet
12
+ @file.close
13
+ end
14
+
15
+ def header
16
+ write_row @header
17
+ end
18
+
19
+ def write_row(row)
20
+ @sheet.add_row row
21
+ end
22
+
23
+ def close
24
+ @sheet.add_table "A1:#{@sheet.cells.last.r}", name: 'Data'
25
+ @package.serialize @filename
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,16 @@
1
+ require 'yaml'
2
+ module DocParser
3
+ # The YAMLOutput class generates a YAML file containing all rows as seperate
4
+ # YAML documents
5
+ # @see Output
6
+ class YAMLOutput < Output
7
+ # @!visibility private
8
+ def write_row(row)
9
+ @doc ||= {}
10
+ 0.upto(@header.length - 1) do |counter|
11
+ @doc[@header[counter]] = row[counter] rescue ''
12
+ end
13
+ YAML.dump @doc, @file
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,105 @@
1
+ $:.unshift __dir__
2
+ require 'rubygems'
3
+ require 'bundler/setup'
4
+ require 'version'
5
+ require 'output'
6
+ require 'document'
7
+ require 'nokogiri'
8
+ require 'open-uri'
9
+ require 'parallel'
10
+ require 'set'
11
+ require 'output/screen_output.rb'
12
+ require 'output/csv_output.rb'
13
+ require 'output/html_output.rb'
14
+ require 'output/xlsx_output.rb'
15
+ require 'output/yaml_output.rb'
16
+ require 'output/json_output.rb'
17
+ require 'output/multi_output.rb'
18
+ # {include:file:README.md}
19
+ module DocParser
20
+ # The main parser class. This is the class you'll use to create your parser
21
+ # The real work happens in the Document class
22
+ # @see Document
23
+ class Parser
24
+ # @!visibility private
25
+ attr_reader :outputs
26
+
27
+ # Creates a new parser instance
28
+ # @param files [Array] An array containing URLs or paths to files
29
+ # @param quiet [Boolean] Be quiet
30
+ # @param encoding [String] The encoding to use for opening the files
31
+ # @param parallel [Boolean] Use parallel processing
32
+ # @param output [Output, Array] The output(s), defaults to a Screenoutput
33
+ # @param range [Range] Range of files to process (nil means process all)
34
+ # @param num_processes [Fixnum] Number of parallel processes
35
+ def initialize(files: [], quiet: false, encoding: 'utf-8', parallel: true,
36
+ output: ScreenOutput.new, range: nil,
37
+ num_processes: Parallel.processor_count + 1)
38
+ @quiet = quiet
39
+ @parallel = parallel
40
+ @num_processes = num_processes
41
+ @encoding = encoding
42
+ if output.is_a? Output
43
+ @outputs = []
44
+ @outputs << output
45
+ elsif output.is_a?(Array) && output.all? { |o| o.is_a? Output }
46
+ @outputs = output
47
+ else
48
+ raise ArgumentError, 'No outputs specified'
49
+ end
50
+ @files = if range
51
+ files[range]
52
+ else
53
+ files
54
+ end
55
+ log 'DocParser loaded..'
56
+ log "#{@files.length} files loaded (encoding: #{@encoding})"
57
+ end
58
+
59
+ #
60
+ # Parses the `files`
61
+ #
62
+ def parse!(&block)
63
+ log "Parsing #{@files.length} files."
64
+ start_time = Time.now
65
+ resultsets = Array.new(@outputs.length) { Set.new }
66
+
67
+ if @parallel && @num_processes > 1
68
+ log "Starting #{@num_processes} processes"
69
+ Parallel.map(@files, in_processes: @num_processes) do |file|
70
+ Document.new(file, encoding: @encoding, parser: self).parse!(&block)
71
+ end.each do |result|
72
+ result.each_with_index { |set, index| resultsets[index].merge(set) }
73
+ end
74
+ log 'Parallel processing finished, writing results..'
75
+ else
76
+ @files.each do |file|
77
+ doc = Document.new(file, encoding: @encoding, parser: self)
78
+ doc.parse!(&block).each_with_index do |set, index|
79
+ resultsets[index].merge(set)
80
+ end
81
+ end
82
+ end
83
+
84
+ log "\nSummary\n======="
85
+
86
+ @outputs.each_with_index do |output, index|
87
+ resultsets[index].each do |row|
88
+ output.add_row row
89
+ end
90
+ resultsets[index] = nil
91
+ output.close
92
+ log output.summary
93
+ end
94
+
95
+ log ''
96
+ log 'Done processing in %.2fs.' % (Time.now - start_time)
97
+ end
98
+
99
+ private
100
+
101
+ def log(str)
102
+ puts str unless @quiet
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,4 @@
1
+ module DocParser
2
+ # The current version of DocParser
3
+ VERSION = '0.0.1'
4
+ end
metadata ADDED
@@ -0,0 +1,93 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: docparser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Jurriaan Pruis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-04-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: DocParser is a Ruby Gem for webscraping
42
+ email:
43
+ - email@jurriaanpruis.nl
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - .yardops
50
+ - Gemfile
51
+ - LICENSE
52
+ - README.md
53
+ - Rakefile
54
+ - docparser.gemspec
55
+ - example.rb
56
+ - lib/docparser.rb
57
+ - lib/docparser/document.rb
58
+ - lib/docparser/output.rb
59
+ - lib/docparser/output/csv_output.rb
60
+ - lib/docparser/output/html_output.rb
61
+ - lib/docparser/output/json_output.rb
62
+ - lib/docparser/output/multi_output.rb
63
+ - lib/docparser/output/screen_output.rb
64
+ - lib/docparser/output/xlsx_output.rb
65
+ - lib/docparser/output/yaml_output.rb
66
+ - lib/docparser/parser.rb
67
+ - lib/docparser/version.rb
68
+ homepage: https://github.com/jurriaan/docparser
69
+ licenses:
70
+ - MIT
71
+ metadata: {}
72
+ post_install_message:
73
+ rdoc_options: []
74
+ require_paths:
75
+ - lib
76
+ required_ruby_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - '>='
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ required_rubygems_version: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ requirements: []
87
+ rubyforge_project:
88
+ rubygems_version: 2.0.3
89
+ signing_key:
90
+ specification_version: 4
91
+ summary: DocParser is a Ruby Gem for webscraping
92
+ test_files: []
93
+ has_rdoc: