comment_extractor 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +132 -0
- data/bin/comment_parser_debug +45 -0
- data/lib/comment_extractor/code_object/comment.rb +19 -0
- data/lib/comment_extractor/code_object.rb +12 -0
- data/lib/comment_extractor/code_objects.rb +46 -0
- data/lib/comment_extractor/configuration.rb +50 -0
- data/lib/comment_extractor/encoding.rb +40 -0
- data/lib/comment_extractor/extractor/c.rb +8 -0
- data/lib/comment_extractor/extractor/cc.rb +8 -0
- data/lib/comment_extractor/extractor/class.rb +8 -0
- data/lib/comment_extractor/extractor/clojure.rb +11 -0
- data/lib/comment_extractor/extractor/coffee.rb +13 -0
- data/lib/comment_extractor/extractor/concerns/simple_extractor.rb +189 -0
- data/lib/comment_extractor/extractor/concerns/slash_extractor.rb +16 -0
- data/lib/comment_extractor/extractor/cpp.rb +8 -0
- data/lib/comment_extractor/extractor/cs.rb +8 -0
- data/lib/comment_extractor/extractor/css.rb +8 -0
- data/lib/comment_extractor/extractor/cxx.rb +8 -0
- data/lib/comment_extractor/extractor/d.rb +9 -0
- data/lib/comment_extractor/extractor/erlang.rb +12 -0
- data/lib/comment_extractor/extractor/fortran.rb +11 -0
- data/lib/comment_extractor/extractor/go.rb +8 -0
- data/lib/comment_extractor/extractor/h.rb +8 -0
- data/lib/comment_extractor/extractor/haml.rb +49 -0
- data/lib/comment_extractor/extractor/haskell.rb +12 -0
- data/lib/comment_extractor/extractor/hpp.rb +8 -0
- data/lib/comment_extractor/extractor/html.rb +13 -0
- data/lib/comment_extractor/extractor/java.rb +8 -0
- data/lib/comment_extractor/extractor/java_script.rb +12 -0
- data/lib/comment_extractor/extractor/lisp.rb +11 -0
- data/lib/comment_extractor/extractor/lua.rb +12 -0
- data/lib/comment_extractor/extractor/m.rb +9 -0
- data/lib/comment_extractor/extractor/markdown.rb +7 -0
- data/lib/comment_extractor/extractor/mm.rb +8 -0
- data/lib/comment_extractor/extractor/perl.rb +12 -0
- data/lib/comment_extractor/extractor/php.rb +8 -0
- data/lib/comment_extractor/extractor/python.rb +13 -0
- data/lib/comment_extractor/extractor/ruby.rb +40 -0
- data/lib/comment_extractor/extractor/sass.rb +8 -0
- data/lib/comment_extractor/extractor/scala.rb +8 -0
- data/lib/comment_extractor/extractor/scss.rb +8 -0
- data/lib/comment_extractor/extractor/shell.rb +11 -0
- data/lib/comment_extractor/extractor/sqf.rb +8 -0
- data/lib/comment_extractor/extractor/sql.rb +12 -0
- data/lib/comment_extractor/extractor/sqs.rb +7 -0
- data/lib/comment_extractor/extractor/tex.rb +12 -0
- data/lib/comment_extractor/extractor/text.rb +10 -0
- data/lib/comment_extractor/extractor/yaml.rb +12 -0
- data/lib/comment_extractor/extractor.rb +96 -0
- data/lib/comment_extractor/extractor_manager.rb +158 -0
- data/lib/comment_extractor/file.rb +42 -0
- data/lib/comment_extractor/parser.rb +33 -0
- data/lib/comment_extractor/smart_string_scanner.rb +11 -0
- data/lib/comment_extractor/version.rb +4 -0
- data/lib/comment_extractor.rb +18 -0
- data/spec/assets/binary_file +0 -0
- data/spec/assets/shebang_file +3 -0
- data/spec/assets/source_code/c.c +158 -0
- data/spec/assets/source_code/cc.cc +24 -0
- data/spec/assets/source_code/class +0 -0
- data/spec/assets/source_code/clojure.clj +41 -0
- data/spec/assets/source_code/coffee.coffee +27 -0
- data/spec/assets/source_code/cpp.cpp +130 -0
- data/spec/assets/source_code/cs.cs +53 -0
- data/spec/assets/source_code/css.css +37 -0
- data/spec/assets/source_code/cxx +0 -0
- data/spec/assets/source_code/d.d +110 -0
- data/spec/assets/source_code/erlang.es +34 -0
- data/spec/assets/source_code/fortran.f +41 -0
- data/spec/assets/source_code/golang.go +61 -0
- data/spec/assets/source_code/h +0 -0
- data/spec/assets/source_code/haml.haml +26 -0
- data/spec/assets/source_code/haskell.hs +36 -0
- data/spec/assets/source_code/hpp +0 -0
- data/spec/assets/source_code/html.html +139 -0
- data/spec/assets/source_code/java.java +39 -0
- data/spec/assets/source_code/java_script.js +164 -0
- data/spec/assets/source_code/lisp.el +18 -0
- data/spec/assets/source_code/lua.lua +34 -0
- data/spec/assets/source_code/m +0 -0
- data/spec/assets/source_code/mm +0 -0
- data/spec/assets/source_code/perl.pl +36 -0
- data/spec/assets/source_code/php.php +31 -0
- data/spec/assets/source_code/python.py +139 -0
- data/spec/assets/source_code/ruby.rb +36 -0
- data/spec/assets/source_code/sass.sass +77 -0
- data/spec/assets/source_code/scala.scala +46 -0
- data/spec/assets/source_code/scss.scss +93 -0
- data/spec/assets/source_code/shell.sh +5 -0
- data/spec/assets/source_code/sqf +0 -0
- data/spec/assets/source_code/sql.sql +11 -0
- data/spec/assets/source_code/sqs +0 -0
- data/spec/assets/source_code/tex.tex +20 -0
- data/spec/assets/source_code/text.txt +15 -0
- data/spec/assets/source_code/vim +17 -0
- data/spec/assets/source_code/yaml.yml +44 -0
- data/spec/assets/stripper/children/children +0 -0
- data/spec/assets/stripper/children/children.c +0 -0
- data/spec/assets/stripper/children/children.js +0 -0
- data/spec/assets/stripper/children/children.o +0 -0
- data/spec/assets/stripper/children/children.rb +1 -0
- data/spec/assets/stripper/test +0 -0
- data/spec/assets/stripper/test.c +0 -0
- data/spec/assets/stripper/test.js +0 -0
- data/spec/assets/stripper/test.o +0 -0
- data/spec/assets/stripper/test.rb +1 -0
- data/spec/comment_extractor/code_object/comment_spec.rb +15 -0
- data/spec/comment_extractor/code_object_spec.rb +18 -0
- data/spec/comment_extractor/code_objects_spec.rb +66 -0
- data/spec/comment_extractor/configuration_spec.rb +68 -0
- data/spec/comment_extractor/encoding_spec.rb +77 -0
- data/spec/comment_extractor/extractor/c_spec.rb +9 -0
- data/spec/comment_extractor/extractor/cc_spec.rb +9 -0
- data/spec/comment_extractor/extractor/class_spec.rb +9 -0
- data/spec/comment_extractor/extractor/clojure_spec.rb +9 -0
- data/spec/comment_extractor/extractor/coffee_spec.rb +9 -0
- data/spec/comment_extractor/extractor/cpp_spec.rb +9 -0
- data/spec/comment_extractor/extractor/cs_spec.rb +9 -0
- data/spec/comment_extractor/extractor/css_spec.rb +9 -0
- data/spec/comment_extractor/extractor/cxx_spec.rb +9 -0
- data/spec/comment_extractor/extractor/d_spec.rb +10 -0
- data/spec/comment_extractor/extractor/erlang_spec.rb +10 -0
- data/spec/comment_extractor/extractor/fortran_spec.rb +9 -0
- data/spec/comment_extractor/extractor/go_spec.rb +9 -0
- data/spec/comment_extractor/extractor/h_spec.rb +9 -0
- data/spec/comment_extractor/extractor/haml_spec.rb +9 -0
- data/spec/comment_extractor/extractor/haskell_spec.rb +9 -0
- data/spec/comment_extractor/extractor/hpp_spec.rb +9 -0
- data/spec/comment_extractor/extractor/html_spec.rb +9 -0
- data/spec/comment_extractor/extractor/java_script_spec.rb +10 -0
- data/spec/comment_extractor/extractor/java_spec.rb +9 -0
- data/spec/comment_extractor/extractor/lisp_spec.rb +9 -0
- data/spec/comment_extractor/extractor/lua_spec.rb +9 -0
- data/spec/comment_extractor/extractor/m_spec.rb +9 -0
- data/spec/comment_extractor/extractor/markdown_spec.rb +8 -0
- data/spec/comment_extractor/extractor/mm_spec.rb +9 -0
- data/spec/comment_extractor/extractor/perl_spec.rb +9 -0
- data/spec/comment_extractor/extractor/php_spec.rb +9 -0
- data/spec/comment_extractor/extractor/python_spec.rb +9 -0
- data/spec/comment_extractor/extractor/ruby_spec.rb +12 -0
- data/spec/comment_extractor/extractor/sass_spec.rb +9 -0
- data/spec/comment_extractor/extractor/scala_spec.rb +9 -0
- data/spec/comment_extractor/extractor/scss_spec.rb +9 -0
- data/spec/comment_extractor/extractor/shell_spec.rb +9 -0
- data/spec/comment_extractor/extractor/sqf_spec.rb +9 -0
- data/spec/comment_extractor/extractor/sql_spec.rb +9 -0
- data/spec/comment_extractor/extractor/sqs_spec.rb +9 -0
- data/spec/comment_extractor/extractor/tex_spec.rb +9 -0
- data/spec/comment_extractor/extractor/text_spec.rb +7 -0
- data/spec/comment_extractor/extractor/yaml_spec.rb +9 -0
- data/spec/comment_extractor/extractor_manager_spec.rb +233 -0
- data/spec/comment_extractor/extractor_spec.rb +102 -0
- data/spec/comment_extractor/file_spec.rb +100 -0
- data/spec/comment_extractor/parser_spec.rb +67 -0
- data/spec/comment_extractor/smart_string_scanner_spec.rb +24 -0
- data/spec/comment_extractor/version_spec.rb +8 -0
- data/spec/comment_extractor_spec.rb +15 -0
- data/spec/spec_helper.rb +22 -0
- data/spec/support/rspec/comment_extractor/extractor_example_group.rb +115 -0
- data/spec/support/rspec/comment_extractor/matchers/extract_comment.rb +58 -0
- data/spec/support/rspec/comment_extractor/matchers.rb +7 -0
- data/spec/support/rspec/comment_extractor.rb +6 -0
- metadata +370 -0
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'comment_extractor/extractor'
|
2
|
+
|
3
|
+
class CommentExtractor::Extractor::Erlang < CommentExtractor::Extractor
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
5
|
+
|
6
|
+
shebang /escript/
|
7
|
+
filename /\.(?:erl|es)$/
|
8
|
+
filetype 'erlang'
|
9
|
+
|
10
|
+
define_default_bracket
|
11
|
+
comment start_with: /%+/
|
12
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'comment_extractor/extractor'
|
2
|
+
|
3
|
+
class CommentExtractor::Extractor::Fortran < CommentExtractor::Extractor
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
5
|
+
|
6
|
+
filename /\.(?:f|f90|F|F90)$/
|
7
|
+
filetype 'fortran'
|
8
|
+
|
9
|
+
define_default_bracket
|
10
|
+
comment start_with: '!'
|
11
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# Copyright (c) 2006-2009 Hampton Catlin and Nathan Weizenbaum and @alpaca-tc
|
2
|
+
|
3
|
+
require 'haml'
|
4
|
+
require 'comment_extractor/extractor'
|
5
|
+
|
6
|
+
class CommentExtractor::Extractor::Haml < CommentExtractor::Extractor
|
7
|
+
filename /\.haml$/
|
8
|
+
filetype 'haml'
|
9
|
+
|
10
|
+
# [review] - incompleted method
|
11
|
+
def scan
|
12
|
+
options = ::Haml::Options.new
|
13
|
+
parser = ::Haml::Parser.new(self.content, options)
|
14
|
+
parsered = parser.parse
|
15
|
+
parsered.children.each do |node|
|
16
|
+
detect_comment_from_node(node)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def detect_comment_from_node(node)
|
23
|
+
case node.type
|
24
|
+
when :haml_comment
|
25
|
+
identify_single_line_comment_from_comment_tag(node)
|
26
|
+
when :tag, :script
|
27
|
+
identify_single_line_comment_from_tag(node)
|
28
|
+
end
|
29
|
+
|
30
|
+
node.children.each do |child|
|
31
|
+
detect_comment_from_node(child)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def identify_single_line_comment_from_tag(node)
|
36
|
+
# [todo] - refactoring
|
37
|
+
if /#(?<comment>[^#]*)$/ =~ (node.value[:text])
|
38
|
+
code_objects << build_comment(node.line, comment) unless comment.empty?
|
39
|
+
end
|
40
|
+
|
41
|
+
if /#(?<comment>[^#]*)$/ =~ node.value[:value]
|
42
|
+
code_objects << build_comment(node.line, comment) unless comment.empty?
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def identify_single_line_comment_from_comment_tag(node)
|
47
|
+
code_objects << build_comment(node.line, node.value[:text]) unless node.value[:text].empty?
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'comment_extractor/extractor'
|
2
|
+
|
3
|
+
class CommentExtractor::Extractor::Haskell < CommentExtractor::Extractor
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
5
|
+
|
6
|
+
filename /\.hs$/
|
7
|
+
filetype 'haskell'
|
8
|
+
|
9
|
+
define_default_bracket
|
10
|
+
comment start_with: '--'
|
11
|
+
comment start_with: '{-', end_with: '-}', type: BLOCK_COMMENT
|
12
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'comment_extractor/extractor'
|
2
|
+
|
3
|
+
class CommentExtractor::Extractor::Html < CommentExtractor::Extractor
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
5
|
+
|
6
|
+
filename /\.html$/
|
7
|
+
filetype 'html'
|
8
|
+
|
9
|
+
define_default_bracket
|
10
|
+
define_ignore_patterns /<\s*script[^>]*>.*?<\/script\s*>/mi
|
11
|
+
|
12
|
+
comment start_with: '<!--', end_with: '-->', type: BLOCK_COMMENT
|
13
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'comment_extractor/extractor'
|
2
|
+
|
3
|
+
class CommentExtractor::Extractor::JavaScript < CommentExtractor::Extractor
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
5
|
+
include CommentExtractor::Extractor::Concerns::SlashExtractor
|
6
|
+
|
7
|
+
filename /\.js$/
|
8
|
+
filetype 'javascript'
|
9
|
+
shebang /.*(?:js|node)$/
|
10
|
+
|
11
|
+
define_regexp_bracket
|
12
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'comment_extractor/extractor'
|
2
|
+
|
3
|
+
class CommentExtractor::Extractor::Lisp < CommentExtractor::Extractor
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
5
|
+
|
6
|
+
filename /\.el$/
|
7
|
+
filetype 'lisp'
|
8
|
+
|
9
|
+
define_bracket '"'
|
10
|
+
comment start_with: /;+/
|
11
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'comment_extractor/extractor'
|
2
|
+
|
3
|
+
class CommentExtractor::Extractor::Lua < CommentExtractor::Extractor
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
5
|
+
|
6
|
+
filename /\.lua$/
|
7
|
+
filetype 'lua'
|
8
|
+
|
9
|
+
define_default_bracket
|
10
|
+
comment start_with: '--\[\[', end_with: /\s*\]\]/, type: BLOCK_COMMENT
|
11
|
+
comment start_with: '--'
|
12
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'comment_extractor/extractor'
|
2
|
+
|
3
|
+
class CommentExtractor::Extractor::Perl < CommentExtractor::Extractor
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
5
|
+
|
6
|
+
filename /\.(?:pm|pl)$/
|
7
|
+
filetype 'perl'
|
8
|
+
|
9
|
+
define_default_bracket
|
10
|
+
comment start_with: /^=pod/, end_with: /^=cut/, type: BLOCK_COMMENT
|
11
|
+
comment start_with: '#'
|
12
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'comment_extractor/extractor'
|
2
|
+
|
3
|
+
class CommentExtractor::Extractor::Python < CommentExtractor::Extractor
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
5
|
+
|
6
|
+
filename /\.py$/
|
7
|
+
filetype 'py'
|
8
|
+
|
9
|
+
define_default_bracket
|
10
|
+
comment start_with: '"""', end_with: '"""', type: BLOCK_COMMENT
|
11
|
+
comment start_with: '"""', end_with: '"""', type: BLOCK_COMMENT
|
12
|
+
comment start_with: '#'
|
13
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'rdoc'
|
2
|
+
require 'comment_extractor/extractor'
|
3
|
+
|
4
|
+
class CommentExtractor::Extractor::Ruby < CommentExtractor::Extractor
|
5
|
+
include CommentExtractor::CodeObject::Comment::Type
|
6
|
+
|
7
|
+
filename /(?:Rakefile|Gemfile|\.rb|\.gemspec|Guardfile|config.ru)$/
|
8
|
+
filetype 'ruby'
|
9
|
+
shebang /.*ruby$/
|
10
|
+
|
11
|
+
class Options < Hash
|
12
|
+
def tab_width
|
13
|
+
2
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def scan
|
18
|
+
tokens = RDoc::RubyLex.tokenize(content, Options.new)
|
19
|
+
|
20
|
+
tokens.each do |token|
|
21
|
+
case token
|
22
|
+
when RDoc::RubyToken::TkRD_COMMENT # =begin ... =end
|
23
|
+
token.value.split("\n").each_with_index do |comment, index|
|
24
|
+
line_no = token.line_no + 1 + index
|
25
|
+
add_comment(line_no, comment, type: BLOCK_COMMENT)
|
26
|
+
end
|
27
|
+
when RDoc::RubyToken::TkCOMMENT # # ...
|
28
|
+
comment = token.value.sub(/^\s*#\s?/, '')
|
29
|
+
add_comment(token.line_no, comment, type: ONE_LINER_COMMENT)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def add_comment(line, comment, **metadata)
|
37
|
+
comment_object = build_comment(line, comment, **metadata)
|
38
|
+
code_objects << comment_object
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'comment_extractor/extractor'
|
2
|
+
|
3
|
+
class CommentExtractor::Extractor::Shell < CommentExtractor::Extractor
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
5
|
+
|
6
|
+
filename /(?:\.(?:zsh|bash|sh)|zshrc|bashrc)$/
|
7
|
+
filetype %w[bash sh zsh]
|
8
|
+
|
9
|
+
define_default_bracket
|
10
|
+
comment start_with: '#'
|
11
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'comment_extractor/extractor'
|
2
|
+
|
3
|
+
class CommentExtractor::Extractor::Sql < CommentExtractor::Extractor
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
5
|
+
|
6
|
+
filename /\.sql$/
|
7
|
+
filetype 'sql'
|
8
|
+
|
9
|
+
define_default_bracket
|
10
|
+
comment start_with: '--'
|
11
|
+
comment start_with: '/\*', end_with: '\*/', type: BLOCK_COMMENT
|
12
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'comment_extractor/extractor'
|
2
|
+
|
3
|
+
class CommentExtractor::Extractor::Tex < CommentExtractor::Extractor
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
5
|
+
|
6
|
+
filename /\.tex$/
|
7
|
+
filetype 'tex'
|
8
|
+
|
9
|
+
define_bracket '{'
|
10
|
+
comment start_with: /(?<!\\)%/
|
11
|
+
define_ignore_patterns /\\%/
|
12
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'comment_extractor/extractor'
|
2
|
+
|
3
|
+
class CommentExtractor::Extractor::Yaml < CommentExtractor::Extractor
|
4
|
+
include CommentExtractor::Extractor::Concerns::SimpleExtractor
|
5
|
+
|
6
|
+
filename /\.yml$/
|
7
|
+
filetype 'yaml'
|
8
|
+
|
9
|
+
define_ignore_patterns /^\s*[^#]+$/
|
10
|
+
comment start_with: /^s*#/
|
11
|
+
comment start_with: /\s#(?=[^#]*)$/
|
12
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
require 'comment_extractor/code_object'
|
3
|
+
require 'comment_extractor/code_objects'
|
4
|
+
require 'comment_extractor/extractor/concerns/simple_extractor'
|
5
|
+
require 'comment_extractor/extractor/concerns/slash_extractor'
|
6
|
+
require 'comment_extractor/version'
|
7
|
+
|
8
|
+
module CommentExtractor
|
9
|
+
class Extractor
|
10
|
+
class Error < RuntimeError; end
|
11
|
+
class SyntaxDefinitionError < RuntimeError; end
|
12
|
+
|
13
|
+
REGEXP = {
|
14
|
+
BREAK: /(?:\r?\n|\r)/,
|
15
|
+
}.freeze
|
16
|
+
SCHAME_ACCESSOR_NAMES = %i[shebang filetype filename]
|
17
|
+
|
18
|
+
attr_reader :content, :code_objects
|
19
|
+
|
20
|
+
def self.disabled?
|
21
|
+
@status == :disable
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.disable!
|
25
|
+
@status = :disable
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.schema_accessor(*keys)
|
29
|
+
keys.each do |key|
|
30
|
+
define_singleton_method key do |value = nil|
|
31
|
+
if value
|
32
|
+
self.schema[key] = value
|
33
|
+
else
|
34
|
+
self.schema[key]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
schema_accessor *SCHAME_ACCESSOR_NAMES
|
40
|
+
|
41
|
+
def self.schema
|
42
|
+
@schema ||= {}
|
43
|
+
end
|
44
|
+
|
45
|
+
def initialize(content, code_objects = nil)
|
46
|
+
@content = content
|
47
|
+
@code_objects = code_objects || CodeObjects.new
|
48
|
+
end
|
49
|
+
|
50
|
+
# #extract_comments should retrun CodeObjects contains instance
|
51
|
+
# of CodeObject::Comment
|
52
|
+
def extract_comments
|
53
|
+
@extracted_comments ||= begin
|
54
|
+
scan
|
55
|
+
code_objects
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
protected
|
60
|
+
|
61
|
+
def scan
|
62
|
+
raise NotImplementedError, "You must implement #{self.class}##{__method__}"
|
63
|
+
end
|
64
|
+
|
65
|
+
def scanner
|
66
|
+
@scanner ||= build_scanner
|
67
|
+
end
|
68
|
+
|
69
|
+
def build_scanner
|
70
|
+
StringScanner.new(@content)
|
71
|
+
end
|
72
|
+
|
73
|
+
def build_comment(line, comment, **metadata)
|
74
|
+
metadata[:extractor] = self
|
75
|
+
CodeObject::Comment.new(line: line, value: comment, **metadata)
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def raise_report
|
81
|
+
content = "Content:\n#{@content}"
|
82
|
+
|
83
|
+
raise SyntaxDefinitionError, <<-MSG.gsub(/^\s*/, '') + content
|
84
|
+
Error occurred.
|
85
|
+
Please report to <https://github.com/alpaca-tc/comment_extractor/issues>
|
86
|
+
|
87
|
+
- - -
|
88
|
+
|
89
|
+
CommentExtractor #{CommentExtractor::VERSION}
|
90
|
+
|
91
|
+
Date: #{Time.now}
|
92
|
+
Extractor: #{self.class}
|
93
|
+
MSG
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,158 @@
|
|
1
|
+
require 'comment_extractor'
|
2
|
+
require 'comment_extractor/extractor'
|
3
|
+
require 'comment_extractor/file'
|
4
|
+
|
5
|
+
using CommentExtractor::DetectableSchemeFile
|
6
|
+
|
7
|
+
module CommentExtractor
|
8
|
+
module ExtractorManager
|
9
|
+
class << self
|
10
|
+
def default_extractors
|
11
|
+
%i[
|
12
|
+
C Cc Class Clojure Coffee Cpp
|
13
|
+
Cs Css Cxx D Erlang Fortran Go H Haml
|
14
|
+
Haskell Hpp Html Java JavaScript Lisp
|
15
|
+
Lua M Markdown Mm Perl Php Python
|
16
|
+
Ruby Sass Scala Scss Shell Sqf
|
17
|
+
Sql Sqs Tex Yaml
|
18
|
+
]
|
19
|
+
end
|
20
|
+
|
21
|
+
def regist_extractor(klass_or_symbol)
|
22
|
+
@extractor_definitions = nil
|
23
|
+
extractor = klass_or_symbol.is_a?(Extractor) ? klass_or_symbol : nil
|
24
|
+
extractors[:"#{klass_or_symbol}"] = extractor
|
25
|
+
|
26
|
+
unless extractor
|
27
|
+
filename = "#{klass_or_symbol}".gsub(/\W/, '').gsub(/::/, '/').
|
28
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
29
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
30
|
+
tr("-", "_").
|
31
|
+
downcase
|
32
|
+
file_path = "comment_extractor/extractor/#{filename}"
|
33
|
+
Extractor.autoload klass_or_symbol, file_path
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def can_extract(file_path)
|
38
|
+
return if File.binary?(file_path)
|
39
|
+
|
40
|
+
extractor = nil
|
41
|
+
if shebang = File.shebang(file_path)
|
42
|
+
extractor = find_extractor_by_shebang(shebang)
|
43
|
+
end
|
44
|
+
|
45
|
+
unless extractor
|
46
|
+
extractor = find_extractor_by_filename(file_path)
|
47
|
+
end
|
48
|
+
|
49
|
+
if ::CommentExtractor.configuration.use_default_extractor
|
50
|
+
extractor = default_extractor unless extractor
|
51
|
+
end
|
52
|
+
|
53
|
+
extractor
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def defined_extractor_finders
|
59
|
+
@defined_extractor_finders ||= []
|
60
|
+
end
|
61
|
+
|
62
|
+
def define_extractor_finder_by(*keys)
|
63
|
+
defined_extractor_finders.concat(keys)
|
64
|
+
|
65
|
+
keys.each do |key|
|
66
|
+
define_singleton_method "find_extractor_by_#{key}" do |value|
|
67
|
+
find_extractor_by(key, value)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def initialize_extractors!(new_extractors = default_extractors)
|
73
|
+
new_extractors.each do |extractor|
|
74
|
+
self.regist_extractor(extractor)
|
75
|
+
end
|
76
|
+
|
77
|
+
self
|
78
|
+
end
|
79
|
+
|
80
|
+
def find_extractor_by(key, value)
|
81
|
+
case key
|
82
|
+
when :filename, :shebang
|
83
|
+
# Regexp optimization which can find value O(1)
|
84
|
+
if extractor_definitions[key][:regexp] =~ value
|
85
|
+
index = $~[1..-1].rindex($~[0])
|
86
|
+
extractor_definitions[key][:values][index]
|
87
|
+
end
|
88
|
+
when :filetype
|
89
|
+
extractor_definitions[:filetype][value]
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def extractor_definitions
|
94
|
+
@extractor_definitions ||= build_extractor_definitions
|
95
|
+
end
|
96
|
+
|
97
|
+
def build_extractor_definitions
|
98
|
+
definitions = Hash.new { |h,k| h[k] = { regexp: nil, values: [] } }
|
99
|
+
|
100
|
+
finders = defined_extractor_finders.dup
|
101
|
+
finders.delete(:filetype)
|
102
|
+
definitions[:filetype] = build_filetype_extractor_definitions
|
103
|
+
|
104
|
+
finders.each do |finder|
|
105
|
+
regexp_keys = []
|
106
|
+
values = []
|
107
|
+
|
108
|
+
extractors.each do |name, value|
|
109
|
+
extractor = extractors[name] = value || Extractor.const_get(name)
|
110
|
+
|
111
|
+
next if extractor.disabled?
|
112
|
+
|
113
|
+
if schema = extractor.send(finder)
|
114
|
+
# [review] - Maybe my optimization way is not better
|
115
|
+
regexp_source = schema.is_a?(Regexp) ? schema.source : schema
|
116
|
+
regexp_keys << "(#{regexp_source})"
|
117
|
+
values << extractor
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
unless values.empty?
|
122
|
+
definitions[finder][:values] = values
|
123
|
+
definitions[finder][:regexp] = Regexp.new(regexp_keys.join('|'))
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
definitions
|
128
|
+
end
|
129
|
+
|
130
|
+
def build_filetype_extractor_definitions
|
131
|
+
definitions = Hash.new { |h,k| h[k] = [] }
|
132
|
+
|
133
|
+
extractors.each_with_object(definitions) do |(name, value), memo|
|
134
|
+
extractor = extractors[name] = value || Extractor.const_get(name)
|
135
|
+
filetypes = *extractor.filetype
|
136
|
+
filetypes.each { |filetype| memo[filetype] = extractor }
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def extractors
|
141
|
+
return @extractors if @extractors
|
142
|
+
@extractors = {}
|
143
|
+
initialize_extractors!
|
144
|
+
|
145
|
+
@extractors
|
146
|
+
end
|
147
|
+
|
148
|
+
private
|
149
|
+
|
150
|
+
def default_extractor
|
151
|
+
::CommentExtractor.configuration.default_extractor
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
# define :find_extractor_by_shebang, :find_extractor_by_filename
|
156
|
+
define_extractor_finder_by *Extractor::SCHAME_ACCESSOR_NAMES
|
157
|
+
end
|
158
|
+
end
|