comment_extractor 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +132 -0
- data/bin/comment_parser_debug +45 -0
- data/lib/comment_extractor/code_object/comment.rb +19 -0
- data/lib/comment_extractor/code_object.rb +12 -0
- data/lib/comment_extractor/code_objects.rb +46 -0
- data/lib/comment_extractor/configuration.rb +50 -0
- data/lib/comment_extractor/encoding.rb +40 -0
- data/lib/comment_extractor/extractor/c.rb +8 -0
- data/lib/comment_extractor/extractor/cc.rb +8 -0
- data/lib/comment_extractor/extractor/class.rb +8 -0
- data/lib/comment_extractor/extractor/clojure.rb +11 -0
- data/lib/comment_extractor/extractor/coffee.rb +13 -0
- data/lib/comment_extractor/extractor/concerns/simple_extractor.rb +189 -0
- data/lib/comment_extractor/extractor/concerns/slash_extractor.rb +16 -0
- data/lib/comment_extractor/extractor/cpp.rb +8 -0
- data/lib/comment_extractor/extractor/cs.rb +8 -0
- data/lib/comment_extractor/extractor/css.rb +8 -0
- data/lib/comment_extractor/extractor/cxx.rb +8 -0
- data/lib/comment_extractor/extractor/d.rb +9 -0
- data/lib/comment_extractor/extractor/erlang.rb +12 -0
- data/lib/comment_extractor/extractor/fortran.rb +11 -0
- data/lib/comment_extractor/extractor/go.rb +8 -0
- data/lib/comment_extractor/extractor/h.rb +8 -0
- data/lib/comment_extractor/extractor/haml.rb +49 -0
- data/lib/comment_extractor/extractor/haskell.rb +12 -0
- data/lib/comment_extractor/extractor/hpp.rb +8 -0
- data/lib/comment_extractor/extractor/html.rb +13 -0
- data/lib/comment_extractor/extractor/java.rb +8 -0
- data/lib/comment_extractor/extractor/java_script.rb +12 -0
- data/lib/comment_extractor/extractor/lisp.rb +11 -0
- data/lib/comment_extractor/extractor/lua.rb +12 -0
- data/lib/comment_extractor/extractor/m.rb +9 -0
- data/lib/comment_extractor/extractor/markdown.rb +7 -0
- data/lib/comment_extractor/extractor/mm.rb +8 -0
- data/lib/comment_extractor/extractor/perl.rb +12 -0
- data/lib/comment_extractor/extractor/php.rb +8 -0
- data/lib/comment_extractor/extractor/python.rb +13 -0
- data/lib/comment_extractor/extractor/ruby.rb +40 -0
- data/lib/comment_extractor/extractor/sass.rb +8 -0
- data/lib/comment_extractor/extractor/scala.rb +8 -0
- data/lib/comment_extractor/extractor/scss.rb +8 -0
- data/lib/comment_extractor/extractor/shell.rb +11 -0
- data/lib/comment_extractor/extractor/sqf.rb +8 -0
- data/lib/comment_extractor/extractor/sql.rb +12 -0
- data/lib/comment_extractor/extractor/sqs.rb +7 -0
- data/lib/comment_extractor/extractor/tex.rb +12 -0
- data/lib/comment_extractor/extractor/text.rb +10 -0
- data/lib/comment_extractor/extractor/yaml.rb +12 -0
- data/lib/comment_extractor/extractor.rb +96 -0
- data/lib/comment_extractor/extractor_manager.rb +158 -0
- data/lib/comment_extractor/file.rb +42 -0
- data/lib/comment_extractor/parser.rb +33 -0
- data/lib/comment_extractor/smart_string_scanner.rb +11 -0
- data/lib/comment_extractor/version.rb +4 -0
- data/lib/comment_extractor.rb +18 -0
- data/spec/assets/binary_file +0 -0
- data/spec/assets/shebang_file +3 -0
- data/spec/assets/source_code/c.c +158 -0
- data/spec/assets/source_code/cc.cc +24 -0
- data/spec/assets/source_code/class +0 -0
- data/spec/assets/source_code/clojure.clj +41 -0
- data/spec/assets/source_code/coffee.coffee +27 -0
- data/spec/assets/source_code/cpp.cpp +130 -0
- data/spec/assets/source_code/cs.cs +53 -0
- data/spec/assets/source_code/css.css +37 -0
- data/spec/assets/source_code/cxx +0 -0
- data/spec/assets/source_code/d.d +110 -0
- data/spec/assets/source_code/erlang.es +34 -0
- data/spec/assets/source_code/fortran.f +41 -0
- data/spec/assets/source_code/golang.go +61 -0
- data/spec/assets/source_code/h +0 -0
- data/spec/assets/source_code/haml.haml +26 -0
- data/spec/assets/source_code/haskell.hs +36 -0
- data/spec/assets/source_code/hpp +0 -0
- data/spec/assets/source_code/html.html +139 -0
- data/spec/assets/source_code/java.java +39 -0
- data/spec/assets/source_code/java_script.js +164 -0
- data/spec/assets/source_code/lisp.el +18 -0
- data/spec/assets/source_code/lua.lua +34 -0
- data/spec/assets/source_code/m +0 -0
- data/spec/assets/source_code/mm +0 -0
- data/spec/assets/source_code/perl.pl +36 -0
- data/spec/assets/source_code/php.php +31 -0
- data/spec/assets/source_code/python.py +139 -0
- data/spec/assets/source_code/ruby.rb +36 -0
- data/spec/assets/source_code/sass.sass +77 -0
- data/spec/assets/source_code/scala.scala +46 -0
- data/spec/assets/source_code/scss.scss +93 -0
- data/spec/assets/source_code/shell.sh +5 -0
- data/spec/assets/source_code/sqf +0 -0
- data/spec/assets/source_code/sql.sql +11 -0
- data/spec/assets/source_code/sqs +0 -0
- data/spec/assets/source_code/tex.tex +20 -0
- data/spec/assets/source_code/text.txt +15 -0
- data/spec/assets/source_code/vim +17 -0
- data/spec/assets/source_code/yaml.yml +44 -0
- data/spec/assets/stripper/children/children +0 -0
- data/spec/assets/stripper/children/children.c +0 -0
- data/spec/assets/stripper/children/children.js +0 -0
- data/spec/assets/stripper/children/children.o +0 -0
- data/spec/assets/stripper/children/children.rb +1 -0
- data/spec/assets/stripper/test +0 -0
- data/spec/assets/stripper/test.c +0 -0
- data/spec/assets/stripper/test.js +0 -0
- data/spec/assets/stripper/test.o +0 -0
- data/spec/assets/stripper/test.rb +1 -0
- data/spec/comment_extractor/code_object/comment_spec.rb +15 -0
- data/spec/comment_extractor/code_object_spec.rb +18 -0
- data/spec/comment_extractor/code_objects_spec.rb +66 -0
- data/spec/comment_extractor/configuration_spec.rb +68 -0
- data/spec/comment_extractor/encoding_spec.rb +77 -0
- data/spec/comment_extractor/extractor/c_spec.rb +9 -0
- data/spec/comment_extractor/extractor/cc_spec.rb +9 -0
- data/spec/comment_extractor/extractor/class_spec.rb +9 -0
- data/spec/comment_extractor/extractor/clojure_spec.rb +9 -0
- data/spec/comment_extractor/extractor/coffee_spec.rb +9 -0
- data/spec/comment_extractor/extractor/cpp_spec.rb +9 -0
- data/spec/comment_extractor/extractor/cs_spec.rb +9 -0
- data/spec/comment_extractor/extractor/css_spec.rb +9 -0
- data/spec/comment_extractor/extractor/cxx_spec.rb +9 -0
- data/spec/comment_extractor/extractor/d_spec.rb +10 -0
- data/spec/comment_extractor/extractor/erlang_spec.rb +10 -0
- data/spec/comment_extractor/extractor/fortran_spec.rb +9 -0
- data/spec/comment_extractor/extractor/go_spec.rb +9 -0
- data/spec/comment_extractor/extractor/h_spec.rb +9 -0
- data/spec/comment_extractor/extractor/haml_spec.rb +9 -0
- data/spec/comment_extractor/extractor/haskell_spec.rb +9 -0
- data/spec/comment_extractor/extractor/hpp_spec.rb +9 -0
- data/spec/comment_extractor/extractor/html_spec.rb +9 -0
- data/spec/comment_extractor/extractor/java_script_spec.rb +10 -0
- data/spec/comment_extractor/extractor/java_spec.rb +9 -0
- data/spec/comment_extractor/extractor/lisp_spec.rb +9 -0
- data/spec/comment_extractor/extractor/lua_spec.rb +9 -0
- data/spec/comment_extractor/extractor/m_spec.rb +9 -0
- data/spec/comment_extractor/extractor/markdown_spec.rb +8 -0
- data/spec/comment_extractor/extractor/mm_spec.rb +9 -0
- data/spec/comment_extractor/extractor/perl_spec.rb +9 -0
- data/spec/comment_extractor/extractor/php_spec.rb +9 -0
- data/spec/comment_extractor/extractor/python_spec.rb +9 -0
- data/spec/comment_extractor/extractor/ruby_spec.rb +12 -0
- data/spec/comment_extractor/extractor/sass_spec.rb +9 -0
- data/spec/comment_extractor/extractor/scala_spec.rb +9 -0
- data/spec/comment_extractor/extractor/scss_spec.rb +9 -0
- data/spec/comment_extractor/extractor/shell_spec.rb +9 -0
- data/spec/comment_extractor/extractor/sqf_spec.rb +9 -0
- data/spec/comment_extractor/extractor/sql_spec.rb +9 -0
- data/spec/comment_extractor/extractor/sqs_spec.rb +9 -0
- data/spec/comment_extractor/extractor/tex_spec.rb +9 -0
- data/spec/comment_extractor/extractor/text_spec.rb +7 -0
- data/spec/comment_extractor/extractor/yaml_spec.rb +9 -0
- data/spec/comment_extractor/extractor_manager_spec.rb +233 -0
- data/spec/comment_extractor/extractor_spec.rb +102 -0
- data/spec/comment_extractor/file_spec.rb +100 -0
- data/spec/comment_extractor/parser_spec.rb +67 -0
- data/spec/comment_extractor/smart_string_scanner_spec.rb +24 -0
- data/spec/comment_extractor/version_spec.rb +8 -0
- data/spec/comment_extractor_spec.rb +15 -0
- data/spec/spec_helper.rb +22 -0
- data/spec/support/rspec/comment_extractor/extractor_example_group.rb +115 -0
- data/spec/support/rspec/comment_extractor/matchers/extract_comment.rb +58 -0
- data/spec/support/rspec/comment_extractor/matchers.rb +7 -0
- data/spec/support/rspec/comment_extractor.rb +6 -0
- metadata +370 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
require 'comment_extractor/extractor'
|
|
2
|
+
|
|
3
|
+
class CommentExtractor::Extractor::Erlang < CommentExtractor::Extractor
|
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
|
5
|
+
|
|
6
|
+
shebang /escript/
|
|
7
|
+
filename /\.(?:erl|es)$/
|
|
8
|
+
filetype 'erlang'
|
|
9
|
+
|
|
10
|
+
define_default_bracket
|
|
11
|
+
comment start_with: /%+/
|
|
12
|
+
end
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
require 'comment_extractor/extractor'
|
|
2
|
+
|
|
3
|
+
class CommentExtractor::Extractor::Fortran < CommentExtractor::Extractor
|
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
|
5
|
+
|
|
6
|
+
filename /\.(?:f|f90|F|F90)$/
|
|
7
|
+
filetype 'fortran'
|
|
8
|
+
|
|
9
|
+
define_default_bracket
|
|
10
|
+
comment start_with: '!'
|
|
11
|
+
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Copyright (c) 2006-2009 Hampton Catlin and Nathan Weizenbaum and @alpaca-tc
|
|
2
|
+
|
|
3
|
+
require 'haml'
|
|
4
|
+
require 'comment_extractor/extractor'
|
|
5
|
+
|
|
6
|
+
class CommentExtractor::Extractor::Haml < CommentExtractor::Extractor
|
|
7
|
+
filename /\.haml$/
|
|
8
|
+
filetype 'haml'
|
|
9
|
+
|
|
10
|
+
# [review] - incompleted method
|
|
11
|
+
def scan
|
|
12
|
+
options = ::Haml::Options.new
|
|
13
|
+
parser = ::Haml::Parser.new(self.content, options)
|
|
14
|
+
parsered = parser.parse
|
|
15
|
+
parsered.children.each do |node|
|
|
16
|
+
detect_comment_from_node(node)
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
private
|
|
21
|
+
|
|
22
|
+
def detect_comment_from_node(node)
|
|
23
|
+
case node.type
|
|
24
|
+
when :haml_comment
|
|
25
|
+
identify_single_line_comment_from_comment_tag(node)
|
|
26
|
+
when :tag, :script
|
|
27
|
+
identify_single_line_comment_from_tag(node)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
node.children.each do |child|
|
|
31
|
+
detect_comment_from_node(child)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def identify_single_line_comment_from_tag(node)
|
|
36
|
+
# [todo] - refactoring
|
|
37
|
+
if /#(?<comment>[^#]*)$/ =~ (node.value[:text])
|
|
38
|
+
code_objects << build_comment(node.line, comment) unless comment.empty?
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
if /#(?<comment>[^#]*)$/ =~ node.value[:value]
|
|
42
|
+
code_objects << build_comment(node.line, comment) unless comment.empty?
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def identify_single_line_comment_from_comment_tag(node)
|
|
47
|
+
code_objects << build_comment(node.line, node.value[:text]) unless node.value[:text].empty?
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
require 'comment_extractor/extractor'
|
|
2
|
+
|
|
3
|
+
class CommentExtractor::Extractor::Haskell < CommentExtractor::Extractor
|
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
|
5
|
+
|
|
6
|
+
filename /\.hs$/
|
|
7
|
+
filetype 'haskell'
|
|
8
|
+
|
|
9
|
+
define_default_bracket
|
|
10
|
+
comment start_with: '--'
|
|
11
|
+
comment start_with: '{-', end_with: '-}', type: BLOCK_COMMENT
|
|
12
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
require 'comment_extractor/extractor'
|
|
2
|
+
|
|
3
|
+
class CommentExtractor::Extractor::Html < CommentExtractor::Extractor
|
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
|
5
|
+
|
|
6
|
+
filename /\.html$/
|
|
7
|
+
filetype 'html'
|
|
8
|
+
|
|
9
|
+
define_default_bracket
|
|
10
|
+
define_ignore_patterns /<\s*script[^>]*>.*?<\/script\s*>/mi
|
|
11
|
+
|
|
12
|
+
comment start_with: '<!--', end_with: '-->', type: BLOCK_COMMENT
|
|
13
|
+
end
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
require 'comment_extractor/extractor'
|
|
2
|
+
|
|
3
|
+
class CommentExtractor::Extractor::JavaScript < CommentExtractor::Extractor
|
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
|
5
|
+
include CommentExtractor::Extractor::Concerns::SlashExtractor
|
|
6
|
+
|
|
7
|
+
filename /\.js$/
|
|
8
|
+
filetype 'javascript'
|
|
9
|
+
shebang /.*(?:js|node)$/
|
|
10
|
+
|
|
11
|
+
define_regexp_bracket
|
|
12
|
+
end
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
require 'comment_extractor/extractor'
|
|
2
|
+
|
|
3
|
+
class CommentExtractor::Extractor::Lisp < CommentExtractor::Extractor
|
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
|
5
|
+
|
|
6
|
+
filename /\.el$/
|
|
7
|
+
filetype 'lisp'
|
|
8
|
+
|
|
9
|
+
define_bracket '"'
|
|
10
|
+
comment start_with: /;+/
|
|
11
|
+
end
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
require 'comment_extractor/extractor'
|
|
2
|
+
|
|
3
|
+
class CommentExtractor::Extractor::Lua < CommentExtractor::Extractor
|
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
|
5
|
+
|
|
6
|
+
filename /\.lua$/
|
|
7
|
+
filetype 'lua'
|
|
8
|
+
|
|
9
|
+
define_default_bracket
|
|
10
|
+
comment start_with: '--\[\[', end_with: /\s*\]\]/, type: BLOCK_COMMENT
|
|
11
|
+
comment start_with: '--'
|
|
12
|
+
end
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
require 'comment_extractor/extractor'
|
|
2
|
+
|
|
3
|
+
class CommentExtractor::Extractor::Perl < CommentExtractor::Extractor
|
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
|
5
|
+
|
|
6
|
+
filename /\.(?:pm|pl)$/
|
|
7
|
+
filetype 'perl'
|
|
8
|
+
|
|
9
|
+
define_default_bracket
|
|
10
|
+
comment start_with: /^=pod/, end_with: /^=cut/, type: BLOCK_COMMENT
|
|
11
|
+
comment start_with: '#'
|
|
12
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
require 'comment_extractor/extractor'
|
|
2
|
+
|
|
3
|
+
class CommentExtractor::Extractor::Python < CommentExtractor::Extractor
|
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
|
5
|
+
|
|
6
|
+
filename /\.py$/
|
|
7
|
+
filetype 'py'
|
|
8
|
+
|
|
9
|
+
define_default_bracket
|
|
10
|
+
comment start_with: '"""', end_with: '"""', type: BLOCK_COMMENT
|
|
11
|
+
comment start_with: '"""', end_with: '"""', type: BLOCK_COMMENT
|
|
12
|
+
comment start_with: '#'
|
|
13
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
require 'rdoc'
|
|
2
|
+
require 'comment_extractor/extractor'
|
|
3
|
+
|
|
4
|
+
class CommentExtractor::Extractor::Ruby < CommentExtractor::Extractor
|
|
5
|
+
include CommentExtractor::CodeObject::Comment::Type
|
|
6
|
+
|
|
7
|
+
filename /(?:Rakefile|Gemfile|\.rb|\.gemspec|Guardfile|config.ru)$/
|
|
8
|
+
filetype 'ruby'
|
|
9
|
+
shebang /.*ruby$/
|
|
10
|
+
|
|
11
|
+
class Options < Hash
|
|
12
|
+
def tab_width
|
|
13
|
+
2
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def scan
|
|
18
|
+
tokens = RDoc::RubyLex.tokenize(content, Options.new)
|
|
19
|
+
|
|
20
|
+
tokens.each do |token|
|
|
21
|
+
case token
|
|
22
|
+
when RDoc::RubyToken::TkRD_COMMENT # =begin ... =end
|
|
23
|
+
token.value.split("\n").each_with_index do |comment, index|
|
|
24
|
+
line_no = token.line_no + 1 + index
|
|
25
|
+
add_comment(line_no, comment, type: BLOCK_COMMENT)
|
|
26
|
+
end
|
|
27
|
+
when RDoc::RubyToken::TkCOMMENT # # ...
|
|
28
|
+
comment = token.value.sub(/^\s*#\s?/, '')
|
|
29
|
+
add_comment(token.line_no, comment, type: ONE_LINER_COMMENT)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def add_comment(line, comment, **metadata)
|
|
37
|
+
comment_object = build_comment(line, comment, **metadata)
|
|
38
|
+
code_objects << comment_object
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
require 'comment_extractor/extractor'
|
|
2
|
+
|
|
3
|
+
class CommentExtractor::Extractor::Shell < CommentExtractor::Extractor
|
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
|
5
|
+
|
|
6
|
+
filename /(?:\.(?:zsh|bash|sh)|zshrc|bashrc)$/
|
|
7
|
+
filetype %w[bash sh zsh]
|
|
8
|
+
|
|
9
|
+
define_default_bracket
|
|
10
|
+
comment start_with: '#'
|
|
11
|
+
end
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
require 'comment_extractor/extractor'
|
|
2
|
+
|
|
3
|
+
class CommentExtractor::Extractor::Sql < CommentExtractor::Extractor
|
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
|
5
|
+
|
|
6
|
+
filename /\.sql$/
|
|
7
|
+
filetype 'sql'
|
|
8
|
+
|
|
9
|
+
define_default_bracket
|
|
10
|
+
comment start_with: '--'
|
|
11
|
+
comment start_with: '/\*', end_with: '\*/', type: BLOCK_COMMENT
|
|
12
|
+
end
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
require 'comment_extractor/extractor'
|
|
2
|
+
|
|
3
|
+
class CommentExtractor::Extractor::Tex < CommentExtractor::Extractor
|
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
|
5
|
+
|
|
6
|
+
filename /\.tex$/
|
|
7
|
+
filetype 'tex'
|
|
8
|
+
|
|
9
|
+
define_bracket '{'
|
|
10
|
+
comment start_with: /(?<!\\)%/
|
|
11
|
+
define_ignore_patterns /\\%/
|
|
12
|
+
end
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
require 'comment_extractor/extractor'
|
|
2
|
+
|
|
3
|
+
class CommentExtractor::Extractor::Yaml < CommentExtractor::Extractor
|
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
|
5
|
+
|
|
6
|
+
filename /\.yml$/
|
|
7
|
+
filetype 'yaml'
|
|
8
|
+
|
|
9
|
+
define_ignore_patterns /^\s*[^#]+$/
|
|
10
|
+
comment start_with: /^s*#/
|
|
11
|
+
comment start_with: /\s#(?=[^#]*)$/
|
|
12
|
+
end
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
require 'strscan'
|
|
2
|
+
require 'comment_extractor/code_object'
|
|
3
|
+
require 'comment_extractor/code_objects'
|
|
4
|
+
require 'comment_extractor/extractor/concerns/simple_extractor'
|
|
5
|
+
require 'comment_extractor/extractor/concerns/slash_extractor'
|
|
6
|
+
require 'comment_extractor/version'
|
|
7
|
+
|
|
8
|
+
module CommentExtractor
|
|
9
|
+
class Extractor
|
|
10
|
+
class Error < RuntimeError; end
|
|
11
|
+
class SyntaxDefinitionError < RuntimeError; end
|
|
12
|
+
|
|
13
|
+
REGEXP = {
|
|
14
|
+
BREAK: /(?:\r?\n|\r)/,
|
|
15
|
+
}.freeze
|
|
16
|
+
SCHAME_ACCESSOR_NAMES = %i[shebang filetype filename]
|
|
17
|
+
|
|
18
|
+
attr_reader :content, :code_objects
|
|
19
|
+
|
|
20
|
+
def self.disabled?
|
|
21
|
+
@status == :disable
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def self.disable!
|
|
25
|
+
@status = :disable
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def self.schema_accessor(*keys)
|
|
29
|
+
keys.each do |key|
|
|
30
|
+
define_singleton_method key do |value = nil|
|
|
31
|
+
if value
|
|
32
|
+
self.schema[key] = value
|
|
33
|
+
else
|
|
34
|
+
self.schema[key]
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
schema_accessor *SCHAME_ACCESSOR_NAMES
|
|
40
|
+
|
|
41
|
+
def self.schema
|
|
42
|
+
@schema ||= {}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def initialize(content, code_objects = nil)
|
|
46
|
+
@content = content
|
|
47
|
+
@code_objects = code_objects || CodeObjects.new
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# #extract_comments should retrun CodeObjects contains instance
|
|
51
|
+
# of CodeObject::Comment
|
|
52
|
+
def extract_comments
|
|
53
|
+
@extracted_comments ||= begin
|
|
54
|
+
scan
|
|
55
|
+
code_objects
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
protected
|
|
60
|
+
|
|
61
|
+
def scan
|
|
62
|
+
raise NotImplementedError, "You must implement #{self.class}##{__method__}"
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def scanner
|
|
66
|
+
@scanner ||= build_scanner
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def build_scanner
|
|
70
|
+
StringScanner.new(@content)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def build_comment(line, comment, **metadata)
|
|
74
|
+
metadata[:extractor] = self
|
|
75
|
+
CodeObject::Comment.new(line: line, value: comment, **metadata)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
private
|
|
79
|
+
|
|
80
|
+
def raise_report
|
|
81
|
+
content = "Content:\n#{@content}"
|
|
82
|
+
|
|
83
|
+
raise SyntaxDefinitionError, <<-MSG.gsub(/^\s*/, '') + content
|
|
84
|
+
Error occurred.
|
|
85
|
+
Please report to <https://github.com/alpaca-tc/comment_extractor/issues>
|
|
86
|
+
|
|
87
|
+
- - -
|
|
88
|
+
|
|
89
|
+
CommentExtractor #{CommentExtractor::VERSION}
|
|
90
|
+
|
|
91
|
+
Date: #{Time.now}
|
|
92
|
+
Extractor: #{self.class}
|
|
93
|
+
MSG
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
require 'comment_extractor'
|
|
2
|
+
require 'comment_extractor/extractor'
|
|
3
|
+
require 'comment_extractor/file'
|
|
4
|
+
|
|
5
|
+
using CommentExtractor::DetectableSchemeFile
|
|
6
|
+
|
|
7
|
+
module CommentExtractor
|
|
8
|
+
module ExtractorManager
|
|
9
|
+
class << self
|
|
10
|
+
def default_extractors
|
|
11
|
+
%i[
|
|
12
|
+
C Cc Class Clojure Coffee Cpp
|
|
13
|
+
Cs Css Cxx D Erlang Fortran Go H Haml
|
|
14
|
+
Haskell Hpp Html Java JavaScript Lisp
|
|
15
|
+
Lua M Markdown Mm Perl Php Python
|
|
16
|
+
Ruby Sass Scala Scss Shell Sqf
|
|
17
|
+
Sql Sqs Tex Yaml
|
|
18
|
+
]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def regist_extractor(klass_or_symbol)
|
|
22
|
+
@extractor_definitions = nil
|
|
23
|
+
extractor = klass_or_symbol.is_a?(Extractor) ? klass_or_symbol : nil
|
|
24
|
+
extractors[:"#{klass_or_symbol}"] = extractor
|
|
25
|
+
|
|
26
|
+
unless extractor
|
|
27
|
+
filename = "#{klass_or_symbol}".gsub(/\W/, '').gsub(/::/, '/').
|
|
28
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
|
29
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
|
30
|
+
tr("-", "_").
|
|
31
|
+
downcase
|
|
32
|
+
file_path = "comment_extractor/extractor/#{filename}"
|
|
33
|
+
Extractor.autoload klass_or_symbol, file_path
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def can_extract(file_path)
|
|
38
|
+
return if File.binary?(file_path)
|
|
39
|
+
|
|
40
|
+
extractor = nil
|
|
41
|
+
if shebang = File.shebang(file_path)
|
|
42
|
+
extractor = find_extractor_by_shebang(shebang)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
unless extractor
|
|
46
|
+
extractor = find_extractor_by_filename(file_path)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
if ::CommentExtractor.configuration.use_default_extractor
|
|
50
|
+
extractor = default_extractor unless extractor
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
extractor
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
def defined_extractor_finders
|
|
59
|
+
@defined_extractor_finders ||= []
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def define_extractor_finder_by(*keys)
|
|
63
|
+
defined_extractor_finders.concat(keys)
|
|
64
|
+
|
|
65
|
+
keys.each do |key|
|
|
66
|
+
define_singleton_method "find_extractor_by_#{key}" do |value|
|
|
67
|
+
find_extractor_by(key, value)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def initialize_extractors!(new_extractors = default_extractors)
|
|
73
|
+
new_extractors.each do |extractor|
|
|
74
|
+
self.regist_extractor(extractor)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
self
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def find_extractor_by(key, value)
|
|
81
|
+
case key
|
|
82
|
+
when :filename, :shebang
|
|
83
|
+
# Regexp optimization which can find value O(1)
|
|
84
|
+
if extractor_definitions[key][:regexp] =~ value
|
|
85
|
+
index = $~[1..-1].rindex($~[0])
|
|
86
|
+
extractor_definitions[key][:values][index]
|
|
87
|
+
end
|
|
88
|
+
when :filetype
|
|
89
|
+
extractor_definitions[:filetype][value]
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def extractor_definitions
|
|
94
|
+
@extractor_definitions ||= build_extractor_definitions
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def build_extractor_definitions
|
|
98
|
+
definitions = Hash.new { |h,k| h[k] = { regexp: nil, values: [] } }
|
|
99
|
+
|
|
100
|
+
finders = defined_extractor_finders.dup
|
|
101
|
+
finders.delete(:filetype)
|
|
102
|
+
definitions[:filetype] = build_filetype_extractor_definitions
|
|
103
|
+
|
|
104
|
+
finders.each do |finder|
|
|
105
|
+
regexp_keys = []
|
|
106
|
+
values = []
|
|
107
|
+
|
|
108
|
+
extractors.each do |name, value|
|
|
109
|
+
extractor = extractors[name] = value || Extractor.const_get(name)
|
|
110
|
+
|
|
111
|
+
next if extractor.disabled?
|
|
112
|
+
|
|
113
|
+
if schema = extractor.send(finder)
|
|
114
|
+
# [review] - Maybe my optimization way is not better
|
|
115
|
+
regexp_source = schema.is_a?(Regexp) ? schema.source : schema
|
|
116
|
+
regexp_keys << "(#{regexp_source})"
|
|
117
|
+
values << extractor
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
unless values.empty?
|
|
122
|
+
definitions[finder][:values] = values
|
|
123
|
+
definitions[finder][:regexp] = Regexp.new(regexp_keys.join('|'))
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
definitions
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def build_filetype_extractor_definitions
|
|
131
|
+
definitions = Hash.new { |h,k| h[k] = [] }
|
|
132
|
+
|
|
133
|
+
extractors.each_with_object(definitions) do |(name, value), memo|
|
|
134
|
+
extractor = extractors[name] = value || Extractor.const_get(name)
|
|
135
|
+
filetypes = *extractor.filetype
|
|
136
|
+
filetypes.each { |filetype| memo[filetype] = extractor }
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def extractors
|
|
141
|
+
return @extractors if @extractors
|
|
142
|
+
@extractors = {}
|
|
143
|
+
initialize_extractors!
|
|
144
|
+
|
|
145
|
+
@extractors
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
private
|
|
149
|
+
|
|
150
|
+
def default_extractor
|
|
151
|
+
::CommentExtractor.configuration.default_extractor
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# define :find_extractor_by_shebang, :find_extractor_by_filename
|
|
156
|
+
define_extractor_finder_by *Extractor::SCHAME_ACCESSOR_NAMES
|
|
157
|
+
end
|
|
158
|
+
end
|