groff_parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +35 -0
- data/Rakefile +18 -0
- data/groff_parser.gemspec +25 -0
- data/lib/groff_parser.rb +7 -0
- data/lib/groff_parser/document.rb +98 -0
- data/lib/groff_parser/engine.rb +113 -0
- data/lib/groff_parser/version.rb +3 -0
- data/test/document_test.rb +73 -0
- data/test/engine_test.rb +22 -0
- data/test/fixtures/git.1 +1245 -0
- data/test/fixtures/git.1.gz +0 -0
- data/test/test_helper.rb +29 -0
- metadata +122 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 406e4d48011a846f365a37621e4ba1d91a9e5e35
|
4
|
+
data.tar.gz: 02d97ad12f839e36b35ff9d7c82197e935e218c8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 464732dbc63162c610ce01ec69e533ecd48bd4ae59893d5f26f9b3b3b3e4f3dcaee209573e8ac5fbc5c4daff547dfbd2828e0454a80b1ef47ffe79db426c42fd
|
7
|
+
data.tar.gz: c80a4373eceaf28529e77d99323dc4fe308bc65518a2d18eb5551fee48482aa8f37aab212166513d22812444b1548d2c185b1998492be081c3d31f2bfdbcb3e6
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Roberto Dip
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# GroffParser
|
2
|
+
|
3
|
+
### Status
|
4
|
+
[](http://badge.fury.io/rb/groff_parser)
|
5
|
+
[](https://codeclimate.com/github/roperzh/groff_parser)
|
6
|
+
[](https://gemnasium.com/roperzh/groff_parser)
|
7
|
+
|
8
|
+
Tiny library to parse groff files, with some handy metods to manage directories with a lot of files
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
Add this line to your application's Gemfile:
|
13
|
+
|
14
|
+
gem 'groff_parser'
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
$ bundle
|
19
|
+
|
20
|
+
Or install it yourself as:
|
21
|
+
|
22
|
+
$ gem install groff_parser
|
23
|
+
|
24
|
+
## Basic Usage
|
25
|
+
|
26
|
+
Coming soon, in the meantime you can read the [`docs`](http://rubydoc.info/gems/)
|
27
|
+
|
28
|
+
|
29
|
+
## Contributing
|
30
|
+
|
31
|
+
1. Fork it ( http://github.com/roperzh/groff_parser/fork )
|
32
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
33
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
34
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
35
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'bundler/gem_tasks'
|
4
|
+
|
5
|
+
Rake::TestTask.new do |t|
|
6
|
+
t.pattern = 'test/**/*_test.rb'
|
7
|
+
t.libs.push 'test'
|
8
|
+
end
|
9
|
+
|
10
|
+
namespace :test do
|
11
|
+
task :coverage do
|
12
|
+
ENV['COVERAGE'] = 'true'
|
13
|
+
Rake::Task['test'].invoke
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
task default: :test
|
18
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'groff_parser/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "groff_parser"
|
8
|
+
spec.version = GroffParser::VERSION
|
9
|
+
spec.authors = ["Roberto Dip"]
|
10
|
+
spec.email = ["dip.jesusr@gmail.com"]
|
11
|
+
spec.summary = %q{Just a little gem to handle groff files}
|
12
|
+
spec.description = %q{Tiny library to parse groff files, with some handy metods to manage directories with a lot of files}
|
13
|
+
spec.homepage = "https://github.com/roperzh/groff_parser"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency 'minitest', '~> 5'
|
24
|
+
spec.add_development_dependency "inch"
|
25
|
+
end
|
data/lib/groff_parser.rb
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
module GroffParser
|
2
|
+
|
3
|
+
# A class representing a specific document to be parsed
|
4
|
+
|
5
|
+
class Document
|
6
|
+
|
7
|
+
# Initializes the document class
|
8
|
+
#
|
9
|
+
# @since 0.1.0
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# zipped_document = GroffParser::Document.new("path/to/file.gz", zipped: true)
|
13
|
+
# unzipped_document = GroffParser::Document.new("path/to/file.1", zipped: false)
|
14
|
+
#
|
15
|
+
# @param path [String] the path where the document is located
|
16
|
+
#
|
17
|
+
# @param zipped [Boolean] indicates if the document is zipped or not
|
18
|
+
#
|
19
|
+
# @return [GroffParser::Document] a new instance of a Document class
|
20
|
+
|
21
|
+
def initialize(path, zipped: false)
|
22
|
+
@path = path
|
23
|
+
@zipped = zipped
|
24
|
+
end
|
25
|
+
|
26
|
+
# Currently in beta, given a section name it tries to search within the
|
27
|
+
# current document for a title passed as a parameter and return the contents
|
28
|
+
# within the title and the next one
|
29
|
+
#
|
30
|
+
# @since 0.1.0
|
31
|
+
#
|
32
|
+
# @example
|
33
|
+
# document.section("MY SECTION")
|
34
|
+
# # searches for a section like this one:
|
35
|
+
# # .SH
|
36
|
+
# # MY SECTION
|
37
|
+
# # ...
|
38
|
+
#
|
39
|
+
# @param name [String, Symbol] name of the section
|
40
|
+
#
|
41
|
+
# @return [String, nil] the contents of the section or nil if the section
|
42
|
+
# doesn't exist yet
|
43
|
+
|
44
|
+
def section(name)
|
45
|
+
raw_content[/SH \"#{name}\"(.*?)SH/im].gsub("SH", "")
|
46
|
+
end
|
47
|
+
|
48
|
+
# Raw content of the document, without being parsed, in pure
|
49
|
+
# groff format
|
50
|
+
#
|
51
|
+
# @since 0.1.0
|
52
|
+
#
|
53
|
+
# @example
|
54
|
+
# document.raw_content
|
55
|
+
#
|
56
|
+
# @return [String] the document content in groff format
|
57
|
+
|
58
|
+
def raw_content
|
59
|
+
@raw_content ||= `#{get} #{@path}`
|
60
|
+
end
|
61
|
+
|
62
|
+
# Content of the document in a especific format
|
63
|
+
#
|
64
|
+
# @since 0.1.0
|
65
|
+
#
|
66
|
+
# @example
|
67
|
+
# document.formatted_content(:html)
|
68
|
+
#
|
69
|
+
# @param format [Symbol, String] indicates the output format, could be:
|
70
|
+
# dvi, html, lbp, lj4, ps, ascii, cp1047, latin1, utf8, X75, X75, X100, X100
|
71
|
+
#
|
72
|
+
# @return [String] the document content formated in the requested format
|
73
|
+
|
74
|
+
def formatted_content(format)
|
75
|
+
`#{get} #{@path} | groff -mandoc -T#{format}`
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
# Little helper to know which command should be executed in order to parse
|
81
|
+
# files properly
|
82
|
+
#
|
83
|
+
# @since 0.1.0
|
84
|
+
#
|
85
|
+
# @example
|
86
|
+
# @zipped = true
|
87
|
+
# get # => "zcat"
|
88
|
+
#
|
89
|
+
# @zipped = false
|
90
|
+
# get # => "cat"
|
91
|
+
#
|
92
|
+
# @return [String] the proper parameter
|
93
|
+
|
94
|
+
def get
|
95
|
+
@zipped ? "zcat" : "cat"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require "groff_parser/document"
|
2
|
+
|
3
|
+
module GroffParser
|
4
|
+
|
5
|
+
# A handy class who offers some methods to handle multiple documents at the
|
6
|
+
# same time
|
7
|
+
|
8
|
+
class Engine
|
9
|
+
|
10
|
+
# @since 0.1.0
|
11
|
+
attr_accessor :path
|
12
|
+
|
13
|
+
# Stores a path, this one can be a path to a directory or to an specific
|
14
|
+
# file, use the proper class methods is up to you
|
15
|
+
#
|
16
|
+
# @since 0.1.0
|
17
|
+
#
|
18
|
+
# @example
|
19
|
+
# GroffParser::Engine.new("path/to/file")
|
20
|
+
# GroffParser::Engine.new("path/to/directory")
|
21
|
+
#
|
22
|
+
# @param path [String] the path to be stored
|
23
|
+
#
|
24
|
+
# @return [GroffParser::Engine] a new Engine instance
|
25
|
+
|
26
|
+
def initialize(path: Dir.pwd)
|
27
|
+
@path = path
|
28
|
+
end
|
29
|
+
|
30
|
+
# Parse a document located on a given path, if the document is contained
|
31
|
+
# inside of the `@path` variable you can pass only the document name
|
32
|
+
# otherwise it searchs for path provided as a full path
|
33
|
+
#
|
34
|
+
# @since 0.1.0
|
35
|
+
#
|
36
|
+
# @example
|
37
|
+
# parser = GroffParser::Engine.new("some/path")
|
38
|
+
# parser.parse("path/to/another_file")
|
39
|
+
# # Will parse another_file properly
|
40
|
+
# parser.parse("some_file")
|
41
|
+
# # Will search for a file called some_file in some/path
|
42
|
+
#
|
43
|
+
# @param document_path [String] path for a document to parse
|
44
|
+
#
|
45
|
+
# @param zipped [Boolean] indicates if the file is zipped or not (gzip)
|
46
|
+
#
|
47
|
+
# @param format [Symbol, String] indicates the output format, could be:
|
48
|
+
# dvi, html, lbp, lj4, ps, ascii, cp1047, latin1, utf8, X75, X75, X100, X100
|
49
|
+
#
|
50
|
+
# @return [String] the content of the document, parsed in the proper format
|
51
|
+
|
52
|
+
def parse(document_path, zipped, format: :utf8)
|
53
|
+
dpath = document_path.include?(path) ? document_path : "#{path}/#{document_path}"
|
54
|
+
|
55
|
+
Document.new(dpath, zipped: zipped).formatted_content(format)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Parse all documents in a given directory, with a given format
|
59
|
+
#
|
60
|
+
# @since 0.1.0
|
61
|
+
#
|
62
|
+
# @param format [Symbol, String] indicates the output format, could be:
|
63
|
+
# dvi, html, lbp, lj4, ps, ascii, cp1047, latin1, utf8, X75, X75, X100, X100
|
64
|
+
# (default = utf8)
|
65
|
+
#
|
66
|
+
# @param zipped [Boolean] indicates if the file is zipped or not (gzip)
|
67
|
+
#
|
68
|
+
# @return [Array] an array of all the parsed documents
|
69
|
+
|
70
|
+
def parse_all(format: :utf8, zipped: false)
|
71
|
+
documents = []
|
72
|
+
search_path = zipped ? "#{path}/*.gz" : "#{path}/*[0-9]"
|
73
|
+
|
74
|
+
Dir.glob(search_path) do |document|
|
75
|
+
documents << parse(document, zipped, format: format)
|
76
|
+
end
|
77
|
+
|
78
|
+
documents
|
79
|
+
end
|
80
|
+
|
81
|
+
# Executes a passed block, giving a GroffParser::Document as argument
|
82
|
+
#
|
83
|
+
# @since 0.1.0
|
84
|
+
#
|
85
|
+
# @example
|
86
|
+
# parser = GroffParser::Engine.new("/path/to/file")
|
87
|
+
# parser.apply { |document| document.parse }
|
88
|
+
#
|
89
|
+
|
90
|
+
def apply(*args)
|
91
|
+
yield parse(args.join(" "))
|
92
|
+
end
|
93
|
+
|
94
|
+
# Executes a passed block over all the documents in the current directory,
|
95
|
+
# giving a GroffParser::Document as argument for each block execution
|
96
|
+
#
|
97
|
+
# @since 0.1.0
|
98
|
+
#
|
99
|
+
# @example
|
100
|
+
# parser = GroffParser::Engine.new("/folder/with/some/files")
|
101
|
+
# parser.apply_all { |document| document.parse }
|
102
|
+
#
|
103
|
+
# @return [nil]
|
104
|
+
|
105
|
+
def apply_all(*args)
|
106
|
+
search_path = args[0][:zipped] ? "#{path}/*.gz" : "#{path}/*"
|
107
|
+
|
108
|
+
Dir.glob(search_path) do |document|
|
109
|
+
yield parse(*args)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
|
3
|
+
describe GroffParser::Document do
|
4
|
+
|
5
|
+
let(:zipped_document) {
|
6
|
+
GroffParser::Document.new("test/fixtures/git.1.gz", zipped: true)
|
7
|
+
}
|
8
|
+
|
9
|
+
let(:unzipped_document) {
|
10
|
+
GroffParser::Document.new("test/fixtures/git.1", zipped: false)
|
11
|
+
}
|
12
|
+
|
13
|
+
describe "#section" do
|
14
|
+
it "returns the contents of a section delimited by a given title" do
|
15
|
+
zipped_document.section("NAME").must_equal(
|
16
|
+
" \"NAME\"\ngit \\- the stupid content tracker\n."
|
17
|
+
)
|
18
|
+
|
19
|
+
unzipped_document.section("NAME").must_equal(
|
20
|
+
" \"NAME\"\ngit \\- the stupid content tracker\n."
|
21
|
+
)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "returns the same results whether the document is zipped or not" do
|
25
|
+
zipped_document.section("NAME").must_equal(
|
26
|
+
unzipped_document.section("NAME")
|
27
|
+
)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe "#raw_content" do
|
32
|
+
it "returns the content in groff format" do
|
33
|
+
zipped_raw_content = `zcat test/fixtures/git.1.gz`
|
34
|
+
unzipped_raw_content = `cat test/fixtures/git.1`
|
35
|
+
|
36
|
+
zipped_document.raw_content.must_equal zipped_raw_content
|
37
|
+
unzipped_document.raw_content.must_equal unzipped_raw_content
|
38
|
+
end
|
39
|
+
|
40
|
+
it "returns the same results whether the document is zipped or not" do
|
41
|
+
zipped_document.raw_content.must_equal(
|
42
|
+
unzipped_document.raw_content
|
43
|
+
)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
describe "#formatted_content" do
|
48
|
+
let(:timestamp) { Regexp.new(/\<\!\-\- CreationDate\: (.*?) \-\-\>/) }
|
49
|
+
|
50
|
+
it "returns the content in the requested format" do
|
51
|
+
# Due to a small delay parsing data, we need to supress the timestamps
|
52
|
+
method_content_from_zip = zipped_document.formatted_content(:html)
|
53
|
+
.gsub(timestamp, "")
|
54
|
+
|
55
|
+
method_content_from_unzip = unzipped_document.formatted_content(:html)
|
56
|
+
.gsub(timestamp, "")
|
57
|
+
|
58
|
+
method_content_from_zip.must_equal(
|
59
|
+
`zcat test/fixtures/git.1.gz | groff -mandoc -Thtml`.gsub(timestamp, "")
|
60
|
+
)
|
61
|
+
|
62
|
+
method_content_from_unzip.must_equal(
|
63
|
+
`cat test/fixtures/git.1 | groff -mandoc -Thtml`.gsub(timestamp, "")
|
64
|
+
)
|
65
|
+
end
|
66
|
+
|
67
|
+
it "returns the same results whether the document is zipped or not" do
|
68
|
+
zipped_document.formatted_content(:html).gsub(timestamp, "").must_include(
|
69
|
+
unzipped_document.formatted_content(:html).gsub(timestamp, "")
|
70
|
+
)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/test/engine_test.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require File.expand_path(File.join("test/test_helper"))
|
2
|
+
|
3
|
+
describe GroffParser::Engine do
|
4
|
+
|
5
|
+
let(:engine) { GroffParser::Engine.new(path: "test/fixtures") }
|
6
|
+
|
7
|
+
describe "#parse" do
|
8
|
+
it "returns the contents of a document, formatted by the requested format" do
|
9
|
+
engine.parse("git.1.gz", true, format: :utf8).must_equal(
|
10
|
+
`cat test/fixtures/git.1 | groff -mandoc -Tutf8`
|
11
|
+
)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe "#parse_all" do
|
16
|
+
it "returns an array with all the documents contents present in the path" do
|
17
|
+
engine.parse_all.must_equal(
|
18
|
+
[`cat test/fixtures/git.1 | groff -mandoc -Tutf8`]
|
19
|
+
)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|