comment_extractor 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +132 -0
- data/bin/comment_parser_debug +45 -0
- data/lib/comment_extractor/code_object/comment.rb +19 -0
- data/lib/comment_extractor/code_object.rb +12 -0
- data/lib/comment_extractor/code_objects.rb +46 -0
- data/lib/comment_extractor/configuration.rb +50 -0
- data/lib/comment_extractor/encoding.rb +40 -0
- data/lib/comment_extractor/extractor/c.rb +8 -0
- data/lib/comment_extractor/extractor/cc.rb +8 -0
- data/lib/comment_extractor/extractor/class.rb +8 -0
- data/lib/comment_extractor/extractor/clojure.rb +11 -0
- data/lib/comment_extractor/extractor/coffee.rb +13 -0
- data/lib/comment_extractor/extractor/concerns/simple_extractor.rb +189 -0
- data/lib/comment_extractor/extractor/concerns/slash_extractor.rb +16 -0
- data/lib/comment_extractor/extractor/cpp.rb +8 -0
- data/lib/comment_extractor/extractor/cs.rb +8 -0
- data/lib/comment_extractor/extractor/css.rb +8 -0
- data/lib/comment_extractor/extractor/cxx.rb +8 -0
- data/lib/comment_extractor/extractor/d.rb +9 -0
- data/lib/comment_extractor/extractor/erlang.rb +12 -0
- data/lib/comment_extractor/extractor/fortran.rb +11 -0
- data/lib/comment_extractor/extractor/go.rb +8 -0
- data/lib/comment_extractor/extractor/h.rb +8 -0
- data/lib/comment_extractor/extractor/haml.rb +49 -0
- data/lib/comment_extractor/extractor/haskell.rb +12 -0
- data/lib/comment_extractor/extractor/hpp.rb +8 -0
- data/lib/comment_extractor/extractor/html.rb +13 -0
- data/lib/comment_extractor/extractor/java.rb +8 -0
- data/lib/comment_extractor/extractor/java_script.rb +12 -0
- data/lib/comment_extractor/extractor/lisp.rb +11 -0
- data/lib/comment_extractor/extractor/lua.rb +12 -0
- data/lib/comment_extractor/extractor/m.rb +9 -0
- data/lib/comment_extractor/extractor/markdown.rb +7 -0
- data/lib/comment_extractor/extractor/mm.rb +8 -0
- data/lib/comment_extractor/extractor/perl.rb +12 -0
- data/lib/comment_extractor/extractor/php.rb +8 -0
- data/lib/comment_extractor/extractor/python.rb +13 -0
- data/lib/comment_extractor/extractor/ruby.rb +40 -0
- data/lib/comment_extractor/extractor/sass.rb +8 -0
- data/lib/comment_extractor/extractor/scala.rb +8 -0
- data/lib/comment_extractor/extractor/scss.rb +8 -0
- data/lib/comment_extractor/extractor/shell.rb +11 -0
- data/lib/comment_extractor/extractor/sqf.rb +8 -0
- data/lib/comment_extractor/extractor/sql.rb +12 -0
- data/lib/comment_extractor/extractor/sqs.rb +7 -0
- data/lib/comment_extractor/extractor/tex.rb +12 -0
- data/lib/comment_extractor/extractor/text.rb +10 -0
- data/lib/comment_extractor/extractor/yaml.rb +12 -0
- data/lib/comment_extractor/extractor.rb +96 -0
- data/lib/comment_extractor/extractor_manager.rb +158 -0
- data/lib/comment_extractor/file.rb +42 -0
- data/lib/comment_extractor/parser.rb +33 -0
- data/lib/comment_extractor/smart_string_scanner.rb +11 -0
- data/lib/comment_extractor/version.rb +4 -0
- data/lib/comment_extractor.rb +18 -0
- data/spec/assets/binary_file +0 -0
- data/spec/assets/shebang_file +3 -0
- data/spec/assets/source_code/c.c +158 -0
- data/spec/assets/source_code/cc.cc +24 -0
- data/spec/assets/source_code/class +0 -0
- data/spec/assets/source_code/clojure.clj +41 -0
- data/spec/assets/source_code/coffee.coffee +27 -0
- data/spec/assets/source_code/cpp.cpp +130 -0
- data/spec/assets/source_code/cs.cs +53 -0
- data/spec/assets/source_code/css.css +37 -0
- data/spec/assets/source_code/cxx +0 -0
- data/spec/assets/source_code/d.d +110 -0
- data/spec/assets/source_code/erlang.es +34 -0
- data/spec/assets/source_code/fortran.f +41 -0
- data/spec/assets/source_code/golang.go +61 -0
- data/spec/assets/source_code/h +0 -0
- data/spec/assets/source_code/haml.haml +26 -0
- data/spec/assets/source_code/haskell.hs +36 -0
- data/spec/assets/source_code/hpp +0 -0
- data/spec/assets/source_code/html.html +139 -0
- data/spec/assets/source_code/java.java +39 -0
- data/spec/assets/source_code/java_script.js +164 -0
- data/spec/assets/source_code/lisp.el +18 -0
- data/spec/assets/source_code/lua.lua +34 -0
- data/spec/assets/source_code/m +0 -0
- data/spec/assets/source_code/mm +0 -0
- data/spec/assets/source_code/perl.pl +36 -0
- data/spec/assets/source_code/php.php +31 -0
- data/spec/assets/source_code/python.py +139 -0
- data/spec/assets/source_code/ruby.rb +36 -0
- data/spec/assets/source_code/sass.sass +77 -0
- data/spec/assets/source_code/scala.scala +46 -0
- data/spec/assets/source_code/scss.scss +93 -0
- data/spec/assets/source_code/shell.sh +5 -0
- data/spec/assets/source_code/sqf +0 -0
- data/spec/assets/source_code/sql.sql +11 -0
- data/spec/assets/source_code/sqs +0 -0
- data/spec/assets/source_code/tex.tex +20 -0
- data/spec/assets/source_code/text.txt +15 -0
- data/spec/assets/source_code/vim +17 -0
- data/spec/assets/source_code/yaml.yml +44 -0
- data/spec/assets/stripper/children/children +0 -0
- data/spec/assets/stripper/children/children.c +0 -0
- data/spec/assets/stripper/children/children.js +0 -0
- data/spec/assets/stripper/children/children.o +0 -0
- data/spec/assets/stripper/children/children.rb +1 -0
- data/spec/assets/stripper/test +0 -0
- data/spec/assets/stripper/test.c +0 -0
- data/spec/assets/stripper/test.js +0 -0
- data/spec/assets/stripper/test.o +0 -0
- data/spec/assets/stripper/test.rb +1 -0
- data/spec/comment_extractor/code_object/comment_spec.rb +15 -0
- data/spec/comment_extractor/code_object_spec.rb +18 -0
- data/spec/comment_extractor/code_objects_spec.rb +66 -0
- data/spec/comment_extractor/configuration_spec.rb +68 -0
- data/spec/comment_extractor/encoding_spec.rb +77 -0
- data/spec/comment_extractor/extractor/c_spec.rb +9 -0
- data/spec/comment_extractor/extractor/cc_spec.rb +9 -0
- data/spec/comment_extractor/extractor/class_spec.rb +9 -0
- data/spec/comment_extractor/extractor/clojure_spec.rb +9 -0
- data/spec/comment_extractor/extractor/coffee_spec.rb +9 -0
- data/spec/comment_extractor/extractor/cpp_spec.rb +9 -0
- data/spec/comment_extractor/extractor/cs_spec.rb +9 -0
- data/spec/comment_extractor/extractor/css_spec.rb +9 -0
- data/spec/comment_extractor/extractor/cxx_spec.rb +9 -0
- data/spec/comment_extractor/extractor/d_spec.rb +10 -0
- data/spec/comment_extractor/extractor/erlang_spec.rb +10 -0
- data/spec/comment_extractor/extractor/fortran_spec.rb +9 -0
- data/spec/comment_extractor/extractor/go_spec.rb +9 -0
- data/spec/comment_extractor/extractor/h_spec.rb +9 -0
- data/spec/comment_extractor/extractor/haml_spec.rb +9 -0
- data/spec/comment_extractor/extractor/haskell_spec.rb +9 -0
- data/spec/comment_extractor/extractor/hpp_spec.rb +9 -0
- data/spec/comment_extractor/extractor/html_spec.rb +9 -0
- data/spec/comment_extractor/extractor/java_script_spec.rb +10 -0
- data/spec/comment_extractor/extractor/java_spec.rb +9 -0
- data/spec/comment_extractor/extractor/lisp_spec.rb +9 -0
- data/spec/comment_extractor/extractor/lua_spec.rb +9 -0
- data/spec/comment_extractor/extractor/m_spec.rb +9 -0
- data/spec/comment_extractor/extractor/markdown_spec.rb +8 -0
- data/spec/comment_extractor/extractor/mm_spec.rb +9 -0
- data/spec/comment_extractor/extractor/perl_spec.rb +9 -0
- data/spec/comment_extractor/extractor/php_spec.rb +9 -0
- data/spec/comment_extractor/extractor/python_spec.rb +9 -0
- data/spec/comment_extractor/extractor/ruby_spec.rb +12 -0
- data/spec/comment_extractor/extractor/sass_spec.rb +9 -0
- data/spec/comment_extractor/extractor/scala_spec.rb +9 -0
- data/spec/comment_extractor/extractor/scss_spec.rb +9 -0
- data/spec/comment_extractor/extractor/shell_spec.rb +9 -0
- data/spec/comment_extractor/extractor/sqf_spec.rb +9 -0
- data/spec/comment_extractor/extractor/sql_spec.rb +9 -0
- data/spec/comment_extractor/extractor/sqs_spec.rb +9 -0
- data/spec/comment_extractor/extractor/tex_spec.rb +9 -0
- data/spec/comment_extractor/extractor/text_spec.rb +7 -0
- data/spec/comment_extractor/extractor/yaml_spec.rb +9 -0
- data/spec/comment_extractor/extractor_manager_spec.rb +233 -0
- data/spec/comment_extractor/extractor_spec.rb +102 -0
- data/spec/comment_extractor/file_spec.rb +100 -0
- data/spec/comment_extractor/parser_spec.rb +67 -0
- data/spec/comment_extractor/smart_string_scanner_spec.rb +24 -0
- data/spec/comment_extractor/version_spec.rb +8 -0
- data/spec/comment_extractor_spec.rb +15 -0
- data/spec/spec_helper.rb +22 -0
- data/spec/support/rspec/comment_extractor/extractor_example_group.rb +115 -0
- data/spec/support/rspec/comment_extractor/matchers/extract_comment.rb +58 -0
- data/spec/support/rspec/comment_extractor/matchers.rb +7 -0
- data/spec/support/rspec/comment_extractor.rb +6 -0
- metadata +370 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 81a66d3881e6e790f7b4fbd5626661f6b6b7ebce
|
4
|
+
data.tar.gz: eef85195b570687ca4e6eafbbe841de336884233
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 918fd7799b29841b8d1ca3a23329ba8c59f48635806dfb29b68e993ec85f20150fe33170db4eb3c228b2d7326503ac76726fca9bbe06f364eac489e81c84d0d2
|
7
|
+
data.tar.gz: ee4337e67ac7c371d0cad3b83991b379b9d0dcc97e4c709935750b82fc277b75107d4874272876c9954a30dc893f7be16b901e93ca5a6242afe0610d5b11cfe6
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) <2014> <Ishii Hiroyuki>
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
# comment\_extractor
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/comment_extractor.png)](http://badge.fury.io/rb/comment\_extractor)
|
4
|
+
[![Build Status](https://travis-ci.org/alpaca-tc/comment_extractor.png?branch=v1.0.0)](https://travis-ci.org/alpaca-tc/comment\_parser)
|
5
|
+
[![Coverage Status](https://coveralls.io/repos/alpaca-tc/comment_extractor/badge.png?branch=v1.0.0)](https://coveralls.io/r/alpaca-tc/comment\_extractor?branch=v1.0.0)
|
6
|
+
|
7
|
+
## Description
|
8
|
+
|
9
|
+
comment\_extractor extracts the comment out from a source code.
|
10
|
+
|
11
|
+
## Installation
|
12
|
+
|
13
|
+
CommentExtractor has been tested with ruby 2.1.
|
14
|
+
|
15
|
+
```sh
|
16
|
+
git clone https://github.com/alpaca-tc/comment_extractor
|
17
|
+
cd comment_extractor
|
18
|
+
rake install
|
19
|
+
```
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
### Parser
|
24
|
+
|
25
|
+
Given a file path to `Parser.for`, it finds Extractor and returns an instance of self which is initialized by extractor. Getting the comments from file by using it.
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
require 'comment_extractor'
|
29
|
+
|
30
|
+
path = 'path/to/file'
|
31
|
+
if parser = CommentExtractor::Parser.for(path)
|
32
|
+
comments = parser.parse
|
33
|
+
comemnts.is_a?(CommentExtractor::CodeObjects)
|
34
|
+
|
35
|
+
comment = comments.first
|
36
|
+
comment.file #=> 'path/to/file'
|
37
|
+
comment.line #=> 1
|
38
|
+
comment.value #=> 'I am a comment'
|
39
|
+
end
|
40
|
+
```
|
41
|
+
|
42
|
+
### Extractor
|
43
|
+
|
44
|
+
#### You can use Extractor directly.
|
45
|
+
|
46
|
+
```ruby
|
47
|
+
require 'comment_extractor'
|
48
|
+
|
49
|
+
file_path = 'path/to/file.rb'
|
50
|
+
manager = CommentExtractor::ExtractorManager
|
51
|
+
if extractor = manager.can_extract(file_path)
|
52
|
+
content = File.read(file_path)
|
53
|
+
comments = extractor.new(content).extract_comments
|
54
|
+
comemnts.is_a?(CommentExtractor::CodeObjects)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Other way to find extractor
|
58
|
+
extractor = manager.find_extractor_by_shebang('#! /usr/local/bin/ruby')
|
59
|
+
extractor = manager.find_extractor_by_filename('path/to/file.rb')
|
60
|
+
extractor = manager.find_extractor_by_filetype('ruby')
|
61
|
+
```
|
62
|
+
|
63
|
+
#### How to use extractor of specific filetype.
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
require 'comment_extractor/extractor/d'
|
67
|
+
|
68
|
+
content = File.read('path/to/file.d')
|
69
|
+
comments = CommentExtractor::Extractor::D.new(content).extract_comments
|
70
|
+
```
|
71
|
+
|
72
|
+
### Supported FileTypes
|
73
|
+
|
74
|
+
- **Bash / Zsh**
|
75
|
+
- **C / C++**
|
76
|
+
- **Class**
|
77
|
+
- **C#**
|
78
|
+
- **Clojure**
|
79
|
+
- **Coffee-Script**
|
80
|
+
- **D**
|
81
|
+
- **EmacsLisp**
|
82
|
+
- **Erlang**
|
83
|
+
- **Fortran**
|
84
|
+
- **Go**
|
85
|
+
- **Haml**
|
86
|
+
- **Haskell**
|
87
|
+
- **HTML**
|
88
|
+
- **Java**
|
89
|
+
- **JavaScript**
|
90
|
+
- **Tex**
|
91
|
+
- **Lua**
|
92
|
+
- **PHP**
|
93
|
+
- **Perl**
|
94
|
+
- **Python**
|
95
|
+
- **Ruby**
|
96
|
+
- **SASS**
|
97
|
+
- **SCSS**
|
98
|
+
- **SQF**
|
99
|
+
- **SQL**
|
100
|
+
- **Scala**
|
101
|
+
|
102
|
+
### TODO
|
103
|
+
|
104
|
+
- Markdown
|
105
|
+
- SQS; I can not implement it because I do not know the syntax of sqs.
|
106
|
+
|
107
|
+
### Create a new Extractor
|
108
|
+
|
109
|
+
If you see something missing from the supported file type, please either file an issue or submit a pull request:)
|
110
|
+
And I would be glad if I could have you send the new filetype's source code via an issues.
|
111
|
+
|
112
|
+
```ruby
|
113
|
+
# lib/comment_extractor/extractor/file_type.rb
|
114
|
+
require 'comment_extractor/extractor'
|
115
|
+
|
116
|
+
class CommentExtractor::Extractor::FileType < CommentExtractor::Extractor
|
117
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
118
|
+
|
119
|
+
shebang /ruby$/ # (Optional)
|
120
|
+
filename /\.(extention)$/ # (Required)
|
121
|
+
filetype 'filetype' # (Required) file type name. g.c 'ruby', 'python'
|
122
|
+
|
123
|
+
# define_ignore_patterns(*given regexp)
|
124
|
+
|
125
|
+
# define_bracket('"') #=> define_ignore_patterns(/".*?(?<!\\)"/)
|
126
|
+
# define_regexp_bracket #=> define_ignore_patterns(%r!/(?=[^/])!, /(?<!\\)\//)
|
127
|
+
|
128
|
+
# define the rule of comment
|
129
|
+
comment start_with: /;+/
|
130
|
+
comment start_with: /;--/, end_with: /--\|/, type: BLOCK_COMMENT
|
131
|
+
end
|
132
|
+
```
|
@@ -0,0 +1,45 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
$:.unshift File.expand_path('../../lib', __FILE__)
|
4
|
+
require 'comment_extractor'
|
5
|
+
|
6
|
+
class CommentExtractor::Debugger
|
7
|
+
def initialize(file_path)
|
8
|
+
@files = if File.file?(file_path)
|
9
|
+
[file_path]
|
10
|
+
elsif File.directory?(file_path)
|
11
|
+
Dir["#{file_path}/**/*"].select { |f| File.file?(f) }
|
12
|
+
end
|
13
|
+
@debug = Hash.new { |h,k| h[k] = [] }
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_all_files
|
17
|
+
@files.each do |file|
|
18
|
+
parse_file(file)
|
19
|
+
end
|
20
|
+
|
21
|
+
puts "Parser is not found"
|
22
|
+
puts @debug[:parser_not_found].join("\n")
|
23
|
+
end
|
24
|
+
|
25
|
+
def parse_file(file_path)
|
26
|
+
puts "Open: #{file_path}"
|
27
|
+
|
28
|
+
if parser = CommentExtractor::Parser.for(file_path)
|
29
|
+
puts "Use: #{parser.extractor.class}"
|
30
|
+
comments = parser.extract_comments
|
31
|
+
else
|
32
|
+
@debug[:parser_not_found] << file_path
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
CommentExtractor.configure do |c|
|
38
|
+
c.use_default_extractor = false
|
39
|
+
end
|
40
|
+
|
41
|
+
file_path = ARGV.first
|
42
|
+
exit unless file_path
|
43
|
+
|
44
|
+
debugger = CommentExtractor::Debugger.new(file_path)
|
45
|
+
debugger.parse_all_files
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'comment_extractor/code_object'
|
2
|
+
|
3
|
+
module CommentExtractor
|
4
|
+
class CodeObject
|
5
|
+
class Comment < CommentExtractor::CodeObject
|
6
|
+
attr_accessor :line
|
7
|
+
|
8
|
+
module Type
|
9
|
+
ONE_LINER_COMMENT = :one_liner_comment
|
10
|
+
BLOCK_COMMENT = :block_comment
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(line: line, **values)
|
14
|
+
super(**values)
|
15
|
+
@line = line
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'delegate'
|
2
|
+
|
3
|
+
module CommentExtractor
|
4
|
+
class CodeObjects < DelegateClass(Array)
|
5
|
+
attr_accessor :file
|
6
|
+
|
7
|
+
def initialize(file: nil)
|
8
|
+
@file = file
|
9
|
+
super([])
|
10
|
+
end
|
11
|
+
|
12
|
+
def <<(code_object)
|
13
|
+
super(initialize_code_object(code_object))
|
14
|
+
end
|
15
|
+
|
16
|
+
def push(*code_object_array)
|
17
|
+
arguments = code_object_array.map { |v| initialize_code_object(v) }
|
18
|
+
super(*arguments)
|
19
|
+
end
|
20
|
+
|
21
|
+
def concat(*code_object_arrays)
|
22
|
+
arguments = code_object_arrays.flatten.map { |v| initialize_code_object(v) }
|
23
|
+
super(arguments)
|
24
|
+
end
|
25
|
+
|
26
|
+
def inspect
|
27
|
+
attributes = instance_variables.map { |v| "@#{v}=#{instance_variable_get(v)}" }
|
28
|
+
attributes = attributes.empty? ? '' : " #{attributes.join(', ')}"
|
29
|
+
object_id = '0x%x' % (self.object_id << 1)
|
30
|
+
"#<#{self.class}:#{object_id}#{attributes}>"
|
31
|
+
end
|
32
|
+
alias :to_s :inspect
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def initialize_code_object(code_object)
|
37
|
+
unless code_object.is_a?(CodeObject)
|
38
|
+
message = "no implicit conversion of #{code_object.class} into #{CodeObject}"
|
39
|
+
raise TypeError, message
|
40
|
+
end
|
41
|
+
|
42
|
+
code_object.metadata[:parent] = self
|
43
|
+
code_object
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'comment_extractor/extractor_manager'
|
2
|
+
require 'comment_extractor/extractor/text'
|
3
|
+
|
4
|
+
module CommentExtractor
|
5
|
+
class Configuration
|
6
|
+
@@required_attributes = {}
|
7
|
+
|
8
|
+
def initialize(attributes = {})
|
9
|
+
attributes.each do |key, value|
|
10
|
+
method_name = "#{key}="
|
11
|
+
send(method_name, value) if respond_to?(method_name)
|
12
|
+
end
|
13
|
+
|
14
|
+
@@required_attributes.each_key do |key|
|
15
|
+
raise "Unable to initialize #{key} without attribute" unless self.send(key)
|
16
|
+
end
|
17
|
+
|
18
|
+
self.extractors = ExtractorManager.default_extractors
|
19
|
+
self.default_extractor = Extractor::Text
|
20
|
+
self.use_default_extractor = true
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.add_setting(name, opts={})
|
24
|
+
attr_accessor name
|
25
|
+
|
26
|
+
define_predicate_for(name) if opts.delete(:predicate)
|
27
|
+
define_required_attribute(name) if opts.delete(:required)
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def self.define_required_attribute(*names)
|
33
|
+
names.each do |name|
|
34
|
+
@@required_attributes[name] = nil
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.define_predicate_for(*names)
|
39
|
+
names.each do |name|
|
40
|
+
define_method "#{name}?" do
|
41
|
+
!!send(name)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
add_setting :extractors
|
47
|
+
add_setting :default_extractor
|
48
|
+
add_setting :use_default_extractor
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module CommentExtractor
|
2
|
+
module Encoding
|
3
|
+
def self.read_file(file_path, encoding = ::Encoding.default_external)
|
4
|
+
content = File.open(file_path, 'rb') { |f| f.read }
|
5
|
+
self.encode(content)
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.encode(content, encoding = ::Encoding.default_external)
|
9
|
+
windows_platforms = Regexp.new(%w[mingw mswin].join('|'))
|
10
|
+
content.gsub!("\r\n", "\n") if RUBY_PLATFORM =~ windows_platforms
|
11
|
+
|
12
|
+
original_encoding = content.encoding
|
13
|
+
|
14
|
+
if strip_bom(content) # When the content contains bom, it is UTF-8
|
15
|
+
content.force_encoding(::Encoding::UTF_8)
|
16
|
+
content.encode!(encoding)
|
17
|
+
else
|
18
|
+
content.force_encoding(encoding)
|
19
|
+
end
|
20
|
+
|
21
|
+
unless content.valid_encoding?
|
22
|
+
content.force_encoding(original_encoding)
|
23
|
+
content.encode!(encoding)
|
24
|
+
end
|
25
|
+
|
26
|
+
unless content.valid_encoding?
|
27
|
+
raise "Unable to convert #{file_path} to #{encoding}"
|
28
|
+
end
|
29
|
+
|
30
|
+
content
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def self.strip_bom(content)
|
36
|
+
bom_regexp = /\A\xef\xbb\xbf/
|
37
|
+
content.sub!(bom_regexp, '')
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'comment_extractor/extractor'
|
2
|
+
|
3
|
+
class CommentExtractor::Extractor::Clojure < CommentExtractor::Extractor
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
5
|
+
|
6
|
+
filename /\.clj$/
|
7
|
+
filetype 'clojure'
|
8
|
+
|
9
|
+
define_default_bracket
|
10
|
+
comment start_with: /;+/
|
11
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'comment_extractor/extractor'
|
2
|
+
|
3
|
+
class CommentExtractor::Extractor::Coffee < CommentExtractor::Extractor
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
5
|
+
|
6
|
+
filename /\.coffee$/
|
7
|
+
filetype 'coffee'
|
8
|
+
|
9
|
+
define_default_bracket
|
10
|
+
define_regexp_bracket
|
11
|
+
comment start_with: '###', end_with: '###', type: BLOCK_COMMENT
|
12
|
+
comment start_with: '#'
|
13
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
require 'comment_extractor/smart_string_scanner'
|
2
|
+
|
3
|
+
using CommentExtractor::SmartStringScanner
|
4
|
+
|
5
|
+
class CommentExtractor::Extractor
|
6
|
+
module Concerns
|
7
|
+
module SimpleExtractor
|
8
|
+
include CommentExtractor::CodeObject::Comment::Type
|
9
|
+
|
10
|
+
def self.included(k)
|
11
|
+
k.class_eval do |klass|
|
12
|
+
extend ClassMethods
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.attr_definition(*keys)
|
17
|
+
keys.each do |key|
|
18
|
+
define_method key do
|
19
|
+
self.class.instance_variable_get("@#{key}") || []
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
attr_definition :brackets, :comment_regexp,
|
24
|
+
:ignore_patterns, :complicate_conditions
|
25
|
+
|
26
|
+
module ClassMethods
|
27
|
+
include CommentExtractor::CodeObject::Comment::Type
|
28
|
+
|
29
|
+
def included(k)
|
30
|
+
self.instance_variables.each do |key|
|
31
|
+
k.instance_variable_set(key, self.instance_variable_get(key))
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def comment(start_with: nil, end_with: nil, type: ONE_LINER_COMMENT)
|
36
|
+
@comment_regexp ||= []
|
37
|
+
raise ArgumentError unless [type, start_with].all?
|
38
|
+
|
39
|
+
definition = { start_with: build_regexp(start_with), type: type, end_with: end_with }
|
40
|
+
|
41
|
+
if type == BLOCK_COMMENT
|
42
|
+
definition[:end_with] = build_regexp(end_with, Regexp::MULTILINE)
|
43
|
+
end
|
44
|
+
@comment_regexp << definition
|
45
|
+
end
|
46
|
+
|
47
|
+
def define_ignore_patterns(*patterns)
|
48
|
+
@ignore_patterns ||= []
|
49
|
+
@ignore_patterns += patterns
|
50
|
+
end
|
51
|
+
|
52
|
+
def define_bracket(bracket, options = 0)
|
53
|
+
start_regexp = build_regexp(bracket)
|
54
|
+
stop_regexp = if bracket.is_a?(Regexp)
|
55
|
+
join_regexp(/(?<!\\)/, bracket)
|
56
|
+
else
|
57
|
+
/(?<!\\)#{bracket}/
|
58
|
+
end
|
59
|
+
stop_regexp = Regexp.new(stop_regexp.source, options)
|
60
|
+
append_bracket(start_regexp, stop_regexp)
|
61
|
+
end
|
62
|
+
|
63
|
+
def define_regexp_bracket
|
64
|
+
append_bracket(%r!/(?=[^/])!, /(?<!\\)\//)
|
65
|
+
end
|
66
|
+
|
67
|
+
def define_default_bracket
|
68
|
+
define_bracket('"', Regexp::MULTILINE)
|
69
|
+
define_bracket("'", Regexp::MULTILINE)
|
70
|
+
end
|
71
|
+
|
72
|
+
def append_bracket(start_with, end_with)
|
73
|
+
@brackets ||= []
|
74
|
+
@brackets << { start_with: start_with, end_with: end_with }
|
75
|
+
end
|
76
|
+
|
77
|
+
def define_complicate_condition(&proc_object)
|
78
|
+
@complicate_conditions ||= []
|
79
|
+
@complicate_conditions << proc_object
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def join_regexp(*regexp)
|
85
|
+
# [review] - Should I ignore regexp options?
|
86
|
+
Regexp.new(regexp.map { |v| v.source }.inject(:+))
|
87
|
+
end
|
88
|
+
|
89
|
+
def build_regexp(str_or_reg, type = 0)
|
90
|
+
str_or_reg = str_or_reg.source if str_or_reg.respond_to?(:source)
|
91
|
+
Regexp.new(str_or_reg, type)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def scan
|
96
|
+
until scanner.eos?
|
97
|
+
case
|
98
|
+
when scan_ignore_patterns
|
99
|
+
next
|
100
|
+
when scan_complicate_conditions
|
101
|
+
next
|
102
|
+
when scan_comment
|
103
|
+
next
|
104
|
+
when scan_bracket
|
105
|
+
next
|
106
|
+
when scanner.scan(CommentExtractor::Extractor::REGEXP[:BREAK])
|
107
|
+
next
|
108
|
+
when scanner.scan(/./)
|
109
|
+
next
|
110
|
+
else
|
111
|
+
raise_report
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
private
|
117
|
+
|
118
|
+
def scan_complicate_conditions
|
119
|
+
complicate_conditions.each do |proc_object|
|
120
|
+
return if self.instance_eval(&proc_object)
|
121
|
+
end
|
122
|
+
|
123
|
+
nil
|
124
|
+
end
|
125
|
+
|
126
|
+
def scan_bracket
|
127
|
+
brackets.each do |definition|
|
128
|
+
start_with = definition[:start_with]
|
129
|
+
end_with = definition[:end_with]
|
130
|
+
next unless scanner.scan(start_with)
|
131
|
+
|
132
|
+
new_regexp = Regexp.new(/.*?/.source + end_with.source, end_with.options)
|
133
|
+
return scanner.scan(new_regexp)
|
134
|
+
end
|
135
|
+
|
136
|
+
nil
|
137
|
+
end
|
138
|
+
|
139
|
+
def scan_ignore_patterns
|
140
|
+
ignore_patterns.each do |pattern|
|
141
|
+
return true if scanner.scan(pattern)
|
142
|
+
end
|
143
|
+
|
144
|
+
nil
|
145
|
+
end
|
146
|
+
|
147
|
+
def scan_comment
|
148
|
+
comment_regexp.each do |definition|
|
149
|
+
next unless scanner.scan(definition[:start_with])
|
150
|
+
|
151
|
+
result = case definition[:type]
|
152
|
+
when ONE_LINER_COMMENT
|
153
|
+
identify_single_line_comment
|
154
|
+
when BLOCK_COMMENT
|
155
|
+
identify_multi_line_comment(definition[:end_with])
|
156
|
+
else
|
157
|
+
raise_report
|
158
|
+
end
|
159
|
+
|
160
|
+
return result
|
161
|
+
end
|
162
|
+
|
163
|
+
nil
|
164
|
+
end
|
165
|
+
|
166
|
+
def identify_single_line_comment
|
167
|
+
line_number = scanner.current_line
|
168
|
+
comment = scanner.scan(/^.*$/)
|
169
|
+
metadata = { type: ONE_LINER_COMMENT }
|
170
|
+
comment_object = build_comment(line_number, comment, **metadata)
|
171
|
+
|
172
|
+
code_objects << comment_object
|
173
|
+
end
|
174
|
+
|
175
|
+
def identify_multi_line_comment(regexp)
|
176
|
+
line_no = scanner.current_line
|
177
|
+
stop_regexp = Regexp.new(/.*?/.source + regexp.source, regexp.options)
|
178
|
+
comment_block = scanner.scan(stop_regexp)
|
179
|
+
|
180
|
+
remove_tail_regexp = Regexp.new(regexp.source + /$/.source)
|
181
|
+
comments = comment_block.sub(remove_tail_regexp, '').split("\n")
|
182
|
+
comments.each_with_index do |comment, index|
|
183
|
+
metadata = { type: BLOCK_COMMENT }
|
184
|
+
code_objects << build_comment(line_no + index, comment, metadata)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'comment_extractor/extractor'
|
2
|
+
require 'comment_extractor/extractor/concerns/simple_extractor'
|
3
|
+
|
4
|
+
module CommentExtractor
|
5
|
+
class Extractor
|
6
|
+
module Concerns
|
7
|
+
module SlashExtractor
|
8
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
9
|
+
|
10
|
+
define_default_bracket
|
11
|
+
comment start_with: /\/\//
|
12
|
+
comment start_with: /\/\*/, end_with: /\*\//, type: BLOCK_COMMENT
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|