groff_parser 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +35 -0
- data/Rakefile +18 -0
- data/groff_parser.gemspec +25 -0
- data/lib/groff_parser.rb +7 -0
- data/lib/groff_parser/document.rb +98 -0
- data/lib/groff_parser/engine.rb +113 -0
- data/lib/groff_parser/version.rb +3 -0
- data/test/document_test.rb +73 -0
- data/test/engine_test.rb +22 -0
- data/test/fixtures/git.1 +1245 -0
- data/test/fixtures/git.1.gz +0 -0
- data/test/test_helper.rb +29 -0
- metadata +122 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 406e4d48011a846f365a37621e4ba1d91a9e5e35
|
4
|
+
data.tar.gz: 02d97ad12f839e36b35ff9d7c82197e935e218c8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 464732dbc63162c610ce01ec69e533ecd48bd4ae59893d5f26f9b3b3b3e4f3dcaee209573e8ac5fbc5c4daff547dfbd2828e0454a80b1ef47ffe79db426c42fd
|
7
|
+
data.tar.gz: c80a4373eceaf28529e77d99323dc4fe308bc65518a2d18eb5551fee48482aa8f37aab212166513d22812444b1548d2c185b1998492be081c3d31f2bfdbcb3e6
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Roberto Dip
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# GroffParser
|
2
|
+
|
3
|
+
### Status
|
4
|
+
[![Gem Version](https://badge.fury.io/rb/groff_parser.png)](http://badge.fury.io/rb/groff_parser)
|
5
|
+
[![Code Climate](https://codeclimate.com/github/roperzh/groff_parser.png)](https://codeclimate.com/github/roperzh/groff_parser)
|
6
|
+
[![Dependencies](https://gemnasium.com/roperzh/groff_parser.png)](https://gemnasium.com/roperzh/groff_parser)
|
7
|
+
|
8
|
+
Tiny library to parse groff files, with some handy metods to manage directories with a lot of files
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
Add this line to your application's Gemfile:
|
13
|
+
|
14
|
+
gem 'groff_parser'
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
$ bundle
|
19
|
+
|
20
|
+
Or install it yourself as:
|
21
|
+
|
22
|
+
$ gem install groff_parser
|
23
|
+
|
24
|
+
## Basic Usage
|
25
|
+
|
26
|
+
Coming soon, in the meantime you can read the [`docs`](http://rubydoc.info/gems/)
|
27
|
+
|
28
|
+
|
29
|
+
## Contributing
|
30
|
+
|
31
|
+
1. Fork it ( http://github.com/roperzh/groff_parser/fork )
|
32
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
33
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
34
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
35
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'bundler/gem_tasks'
|
4
|
+
|
5
|
+
Rake::TestTask.new do |t|
|
6
|
+
t.pattern = 'test/**/*_test.rb'
|
7
|
+
t.libs.push 'test'
|
8
|
+
end
|
9
|
+
|
10
|
+
namespace :test do
|
11
|
+
task :coverage do
|
12
|
+
ENV['COVERAGE'] = 'true'
|
13
|
+
Rake::Task['test'].invoke
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
task default: :test
|
18
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'groff_parser/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "groff_parser"
|
8
|
+
spec.version = GroffParser::VERSION
|
9
|
+
spec.authors = ["Roberto Dip"]
|
10
|
+
spec.email = ["dip.jesusr@gmail.com"]
|
11
|
+
spec.summary = %q{Just a little gem to handle groff files}
|
12
|
+
spec.description = %q{Tiny library to parse groff files, with some handy metods to manage directories with a lot of files}
|
13
|
+
spec.homepage = "https://github.com/roperzh/groff_parser"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency 'minitest', '~> 5'
|
24
|
+
spec.add_development_dependency "inch"
|
25
|
+
end
|
data/lib/groff_parser.rb
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
module GroffParser
|
2
|
+
|
3
|
+
# A class representing a specific document to be parsed
|
4
|
+
|
5
|
+
class Document
|
6
|
+
|
7
|
+
# Initializes the document class
|
8
|
+
#
|
9
|
+
# @since 0.1.0
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# zipped_document = GroffParser::Document.new("path/to/file.gz", zipped: true)
|
13
|
+
# unzipped_document = GroffParser::Document.new("path/to/file.1", zipped: false)
|
14
|
+
#
|
15
|
+
# @param path [String] the path where the document is located
|
16
|
+
#
|
17
|
+
# @param zipped [Boolean] indicates if the document is zipped or not
|
18
|
+
#
|
19
|
+
# @return [GroffParser::Document] a new instance of a Document class
|
20
|
+
|
21
|
+
def initialize(path, zipped: false)
|
22
|
+
@path = path
|
23
|
+
@zipped = zipped
|
24
|
+
end
|
25
|
+
|
26
|
+
# Currently in beta, given a section name it tries to search within the
|
27
|
+
# current document for a title passed as a parameter and return the contents
|
28
|
+
# within the title and the next one
|
29
|
+
#
|
30
|
+
# @since 0.1.0
|
31
|
+
#
|
32
|
+
# @example
|
33
|
+
# document.section("MY SECTION")
|
34
|
+
# # searches for a section like this one:
|
35
|
+
# # .SH
|
36
|
+
# # MY SECTION
|
37
|
+
# # ...
|
38
|
+
#
|
39
|
+
# @param name [String, Symbol] name of the section
|
40
|
+
#
|
41
|
+
# @return [String, nil] the contents of the section or nil if the section
|
42
|
+
# doesn't exist yet
|
43
|
+
|
44
|
+
def section(name)
|
45
|
+
raw_content[/SH \"#{name}\"(.*?)SH/im].gsub("SH", "")
|
46
|
+
end
|
47
|
+
|
48
|
+
# Raw content of the document, without being parsed, in pure
|
49
|
+
# groff format
|
50
|
+
#
|
51
|
+
# @since 0.1.0
|
52
|
+
#
|
53
|
+
# @example
|
54
|
+
# document.raw_content
|
55
|
+
#
|
56
|
+
# @return [String] the document content in groff format
|
57
|
+
|
58
|
+
def raw_content
|
59
|
+
@raw_content ||= `#{get} #{@path}`
|
60
|
+
end
|
61
|
+
|
62
|
+
# Content of the document in a especific format
|
63
|
+
#
|
64
|
+
# @since 0.1.0
|
65
|
+
#
|
66
|
+
# @example
|
67
|
+
# document.formatted_content(:html)
|
68
|
+
#
|
69
|
+
# @param format [Symbol, String] indicates the output format, could be:
|
70
|
+
# dvi, html, lbp, lj4, ps, ascii, cp1047, latin1, utf8, X75, X75, X100, X100
|
71
|
+
#
|
72
|
+
# @return [String] the document content formated in the requested format
|
73
|
+
|
74
|
+
def formatted_content(format)
|
75
|
+
`#{get} #{@path} | groff -mandoc -T#{format}`
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
# Little helper to know which command should be executed in order to parse
|
81
|
+
# files properly
|
82
|
+
#
|
83
|
+
# @since 0.1.0
|
84
|
+
#
|
85
|
+
# @example
|
86
|
+
# @zipped = true
|
87
|
+
# get # => "zcat"
|
88
|
+
#
|
89
|
+
# @zipped = false
|
90
|
+
# get # => "cat"
|
91
|
+
#
|
92
|
+
# @return [String] the proper parameter
|
93
|
+
|
94
|
+
def get
|
95
|
+
@zipped ? "zcat" : "cat"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require "groff_parser/document"
|
2
|
+
|
3
|
+
module GroffParser
|
4
|
+
|
5
|
+
# A handy class who offers some methods to handle multiple documents at the
|
6
|
+
# same time
|
7
|
+
|
8
|
+
class Engine
|
9
|
+
|
10
|
+
# @since 0.1.0
|
11
|
+
attr_accessor :path
|
12
|
+
|
13
|
+
# Stores a path, this one can be a path to a directory or to an specific
|
14
|
+
# file, use the proper class methods is up to you
|
15
|
+
#
|
16
|
+
# @since 0.1.0
|
17
|
+
#
|
18
|
+
# @example
|
19
|
+
# GroffParser::Engine.new("path/to/file")
|
20
|
+
# GroffParser::Engine.new("path/to/directory")
|
21
|
+
#
|
22
|
+
# @param path [String] the path to be stored
|
23
|
+
#
|
24
|
+
# @return [GroffParser::Engine] a new Engine instance
|
25
|
+
|
26
|
+
def initialize(path: Dir.pwd)
|
27
|
+
@path = path
|
28
|
+
end
|
29
|
+
|
30
|
+
# Parse a document located on a given path, if the document is contained
|
31
|
+
# inside of the `@path` variable you can pass only the document name
|
32
|
+
# otherwise it searchs for path provided as a full path
|
33
|
+
#
|
34
|
+
# @since 0.1.0
|
35
|
+
#
|
36
|
+
# @example
|
37
|
+
# parser = GroffParser::Engine.new("some/path")
|
38
|
+
# parser.parse("path/to/another_file")
|
39
|
+
# # Will parse another_file properly
|
40
|
+
# parser.parse("some_file")
|
41
|
+
# # Will search for a file called some_file in some/path
|
42
|
+
#
|
43
|
+
# @param document_path [String] path for a document to parse
|
44
|
+
#
|
45
|
+
# @param zipped [Boolean] indicates if the file is zipped or not (gzip)
|
46
|
+
#
|
47
|
+
# @param format [Symbol, String] indicates the output format, could be:
|
48
|
+
# dvi, html, lbp, lj4, ps, ascii, cp1047, latin1, utf8, X75, X75, X100, X100
|
49
|
+
#
|
50
|
+
# @return [String] the content of the document, parsed in the proper format
|
51
|
+
|
52
|
+
def parse(document_path, zipped, format: :utf8)
|
53
|
+
dpath = document_path.include?(path) ? document_path : "#{path}/#{document_path}"
|
54
|
+
|
55
|
+
Document.new(dpath, zipped: zipped).formatted_content(format)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Parse all documents in a given directory, with a given format
|
59
|
+
#
|
60
|
+
# @since 0.1.0
|
61
|
+
#
|
62
|
+
# @param format [Symbol, String] indicates the output format, could be:
|
63
|
+
# dvi, html, lbp, lj4, ps, ascii, cp1047, latin1, utf8, X75, X75, X100, X100
|
64
|
+
# (default = utf8)
|
65
|
+
#
|
66
|
+
# @param zipped [Boolean] indicates if the file is zipped or not (gzip)
|
67
|
+
#
|
68
|
+
# @return [Array] an array of all the parsed documents
|
69
|
+
|
70
|
+
def parse_all(format: :utf8, zipped: false)
|
71
|
+
documents = []
|
72
|
+
search_path = zipped ? "#{path}/*.gz" : "#{path}/*[0-9]"
|
73
|
+
|
74
|
+
Dir.glob(search_path) do |document|
|
75
|
+
documents << parse(document, zipped, format: format)
|
76
|
+
end
|
77
|
+
|
78
|
+
documents
|
79
|
+
end
|
80
|
+
|
81
|
+
# Executes a passed block, giving a GroffParser::Document as argument
|
82
|
+
#
|
83
|
+
# @since 0.1.0
|
84
|
+
#
|
85
|
+
# @example
|
86
|
+
# parser = GroffParser::Engine.new("/path/to/file")
|
87
|
+
# parser.apply { |document| document.parse }
|
88
|
+
#
|
89
|
+
|
90
|
+
def apply(*args)
|
91
|
+
yield parse(args.join(" "))
|
92
|
+
end
|
93
|
+
|
94
|
+
# Executes a passed block over all the documents in the current directory,
|
95
|
+
# giving a GroffParser::Document as argument for each block execution
|
96
|
+
#
|
97
|
+
# @since 0.1.0
|
98
|
+
#
|
99
|
+
# @example
|
100
|
+
# parser = GroffParser::Engine.new("/folder/with/some/files")
|
101
|
+
# parser.apply_all { |document| document.parse }
|
102
|
+
#
|
103
|
+
# @return [nil]
|
104
|
+
|
105
|
+
def apply_all(*args)
|
106
|
+
search_path = args[0][:zipped] ? "#{path}/*.gz" : "#{path}/*"
|
107
|
+
|
108
|
+
Dir.glob(search_path) do |document|
|
109
|
+
yield parse(*args)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
|
3
|
+
describe GroffParser::Document do
|
4
|
+
|
5
|
+
let(:zipped_document) {
|
6
|
+
GroffParser::Document.new("test/fixtures/git.1.gz", zipped: true)
|
7
|
+
}
|
8
|
+
|
9
|
+
let(:unzipped_document) {
|
10
|
+
GroffParser::Document.new("test/fixtures/git.1", zipped: false)
|
11
|
+
}
|
12
|
+
|
13
|
+
describe "#section" do
|
14
|
+
it "returns the contents of a section delimited by a given title" do
|
15
|
+
zipped_document.section("NAME").must_equal(
|
16
|
+
" \"NAME\"\ngit \\- the stupid content tracker\n."
|
17
|
+
)
|
18
|
+
|
19
|
+
unzipped_document.section("NAME").must_equal(
|
20
|
+
" \"NAME\"\ngit \\- the stupid content tracker\n."
|
21
|
+
)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "returns the same results whether the document is zipped or not" do
|
25
|
+
zipped_document.section("NAME").must_equal(
|
26
|
+
unzipped_document.section("NAME")
|
27
|
+
)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe "#raw_content" do
|
32
|
+
it "returns the content in groff format" do
|
33
|
+
zipped_raw_content = `zcat test/fixtures/git.1.gz`
|
34
|
+
unzipped_raw_content = `cat test/fixtures/git.1`
|
35
|
+
|
36
|
+
zipped_document.raw_content.must_equal zipped_raw_content
|
37
|
+
unzipped_document.raw_content.must_equal unzipped_raw_content
|
38
|
+
end
|
39
|
+
|
40
|
+
it "returns the same results whether the document is zipped or not" do
|
41
|
+
zipped_document.raw_content.must_equal(
|
42
|
+
unzipped_document.raw_content
|
43
|
+
)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
describe "#formatted_content" do
|
48
|
+
let(:timestamp) { Regexp.new(/\<\!\-\- CreationDate\: (.*?) \-\-\>/) }
|
49
|
+
|
50
|
+
it "returns the content in the requested format" do
|
51
|
+
# Due to a small delay parsing data, we need to supress the timestamps
|
52
|
+
method_content_from_zip = zipped_document.formatted_content(:html)
|
53
|
+
.gsub(timestamp, "")
|
54
|
+
|
55
|
+
method_content_from_unzip = unzipped_document.formatted_content(:html)
|
56
|
+
.gsub(timestamp, "")
|
57
|
+
|
58
|
+
method_content_from_zip.must_equal(
|
59
|
+
`zcat test/fixtures/git.1.gz | groff -mandoc -Thtml`.gsub(timestamp, "")
|
60
|
+
)
|
61
|
+
|
62
|
+
method_content_from_unzip.must_equal(
|
63
|
+
`cat test/fixtures/git.1 | groff -mandoc -Thtml`.gsub(timestamp, "")
|
64
|
+
)
|
65
|
+
end
|
66
|
+
|
67
|
+
it "returns the same results whether the document is zipped or not" do
|
68
|
+
zipped_document.formatted_content(:html).gsub(timestamp, "").must_include(
|
69
|
+
unzipped_document.formatted_content(:html).gsub(timestamp, "")
|
70
|
+
)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/test/engine_test.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require File.expand_path(File.join("test/test_helper"))
|
2
|
+
|
3
|
+
describe GroffParser::Engine do
|
4
|
+
|
5
|
+
let(:engine) { GroffParser::Engine.new(path: "test/fixtures") }
|
6
|
+
|
7
|
+
describe "#parse" do
|
8
|
+
it "returns the contents of a document, formatted by the requested format" do
|
9
|
+
engine.parse("git.1.gz", true, format: :utf8).must_equal(
|
10
|
+
`cat test/fixtures/git.1 | groff -mandoc -Tutf8`
|
11
|
+
)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe "#parse_all" do
|
16
|
+
it "returns an array with all the documents contents present in the path" do
|
17
|
+
engine.parse_all.must_equal(
|
18
|
+
[`cat test/fixtures/git.1 | groff -mandoc -Tutf8`]
|
19
|
+
)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|