comment_extractor 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +132 -0
  4. data/bin/comment_parser_debug +45 -0
  5. data/lib/comment_extractor/code_object/comment.rb +19 -0
  6. data/lib/comment_extractor/code_object.rb +12 -0
  7. data/lib/comment_extractor/code_objects.rb +46 -0
  8. data/lib/comment_extractor/configuration.rb +50 -0
  9. data/lib/comment_extractor/encoding.rb +40 -0
  10. data/lib/comment_extractor/extractor/c.rb +8 -0
  11. data/lib/comment_extractor/extractor/cc.rb +8 -0
  12. data/lib/comment_extractor/extractor/class.rb +8 -0
  13. data/lib/comment_extractor/extractor/clojure.rb +11 -0
  14. data/lib/comment_extractor/extractor/coffee.rb +13 -0
  15. data/lib/comment_extractor/extractor/concerns/simple_extractor.rb +189 -0
  16. data/lib/comment_extractor/extractor/concerns/slash_extractor.rb +16 -0
  17. data/lib/comment_extractor/extractor/cpp.rb +8 -0
  18. data/lib/comment_extractor/extractor/cs.rb +8 -0
  19. data/lib/comment_extractor/extractor/css.rb +8 -0
  20. data/lib/comment_extractor/extractor/cxx.rb +8 -0
  21. data/lib/comment_extractor/extractor/d.rb +9 -0
  22. data/lib/comment_extractor/extractor/erlang.rb +12 -0
  23. data/lib/comment_extractor/extractor/fortran.rb +11 -0
  24. data/lib/comment_extractor/extractor/go.rb +8 -0
  25. data/lib/comment_extractor/extractor/h.rb +8 -0
  26. data/lib/comment_extractor/extractor/haml.rb +49 -0
  27. data/lib/comment_extractor/extractor/haskell.rb +12 -0
  28. data/lib/comment_extractor/extractor/hpp.rb +8 -0
  29. data/lib/comment_extractor/extractor/html.rb +13 -0
  30. data/lib/comment_extractor/extractor/java.rb +8 -0
  31. data/lib/comment_extractor/extractor/java_script.rb +12 -0
  32. data/lib/comment_extractor/extractor/lisp.rb +11 -0
  33. data/lib/comment_extractor/extractor/lua.rb +12 -0
  34. data/lib/comment_extractor/extractor/m.rb +9 -0
  35. data/lib/comment_extractor/extractor/markdown.rb +7 -0
  36. data/lib/comment_extractor/extractor/mm.rb +8 -0
  37. data/lib/comment_extractor/extractor/perl.rb +12 -0
  38. data/lib/comment_extractor/extractor/php.rb +8 -0
  39. data/lib/comment_extractor/extractor/python.rb +13 -0
  40. data/lib/comment_extractor/extractor/ruby.rb +40 -0
  41. data/lib/comment_extractor/extractor/sass.rb +8 -0
  42. data/lib/comment_extractor/extractor/scala.rb +8 -0
  43. data/lib/comment_extractor/extractor/scss.rb +8 -0
  44. data/lib/comment_extractor/extractor/shell.rb +11 -0
  45. data/lib/comment_extractor/extractor/sqf.rb +8 -0
  46. data/lib/comment_extractor/extractor/sql.rb +12 -0
  47. data/lib/comment_extractor/extractor/sqs.rb +7 -0
  48. data/lib/comment_extractor/extractor/tex.rb +12 -0
  49. data/lib/comment_extractor/extractor/text.rb +10 -0
  50. data/lib/comment_extractor/extractor/yaml.rb +12 -0
  51. data/lib/comment_extractor/extractor.rb +96 -0
  52. data/lib/comment_extractor/extractor_manager.rb +158 -0
  53. data/lib/comment_extractor/file.rb +42 -0
  54. data/lib/comment_extractor/parser.rb +33 -0
  55. data/lib/comment_extractor/smart_string_scanner.rb +11 -0
  56. data/lib/comment_extractor/version.rb +4 -0
  57. data/lib/comment_extractor.rb +18 -0
  58. data/spec/assets/binary_file +0 -0
  59. data/spec/assets/shebang_file +3 -0
  60. data/spec/assets/source_code/c.c +158 -0
  61. data/spec/assets/source_code/cc.cc +24 -0
  62. data/spec/assets/source_code/class +0 -0
  63. data/spec/assets/source_code/clojure.clj +41 -0
  64. data/spec/assets/source_code/coffee.coffee +27 -0
  65. data/spec/assets/source_code/cpp.cpp +130 -0
  66. data/spec/assets/source_code/cs.cs +53 -0
  67. data/spec/assets/source_code/css.css +37 -0
  68. data/spec/assets/source_code/cxx +0 -0
  69. data/spec/assets/source_code/d.d +110 -0
  70. data/spec/assets/source_code/erlang.es +34 -0
  71. data/spec/assets/source_code/fortran.f +41 -0
  72. data/spec/assets/source_code/golang.go +61 -0
  73. data/spec/assets/source_code/h +0 -0
  74. data/spec/assets/source_code/haml.haml +26 -0
  75. data/spec/assets/source_code/haskell.hs +36 -0
  76. data/spec/assets/source_code/hpp +0 -0
  77. data/spec/assets/source_code/html.html +139 -0
  78. data/spec/assets/source_code/java.java +39 -0
  79. data/spec/assets/source_code/java_script.js +164 -0
  80. data/spec/assets/source_code/lisp.el +18 -0
  81. data/spec/assets/source_code/lua.lua +34 -0
  82. data/spec/assets/source_code/m +0 -0
  83. data/spec/assets/source_code/mm +0 -0
  84. data/spec/assets/source_code/perl.pl +36 -0
  85. data/spec/assets/source_code/php.php +31 -0
  86. data/spec/assets/source_code/python.py +139 -0
  87. data/spec/assets/source_code/ruby.rb +36 -0
  88. data/spec/assets/source_code/sass.sass +77 -0
  89. data/spec/assets/source_code/scala.scala +46 -0
  90. data/spec/assets/source_code/scss.scss +93 -0
  91. data/spec/assets/source_code/shell.sh +5 -0
  92. data/spec/assets/source_code/sqf +0 -0
  93. data/spec/assets/source_code/sql.sql +11 -0
  94. data/spec/assets/source_code/sqs +0 -0
  95. data/spec/assets/source_code/tex.tex +20 -0
  96. data/spec/assets/source_code/text.txt +15 -0
  97. data/spec/assets/source_code/vim +17 -0
  98. data/spec/assets/source_code/yaml.yml +44 -0
  99. data/spec/assets/stripper/children/children +0 -0
  100. data/spec/assets/stripper/children/children.c +0 -0
  101. data/spec/assets/stripper/children/children.js +0 -0
  102. data/spec/assets/stripper/children/children.o +0 -0
  103. data/spec/assets/stripper/children/children.rb +1 -0
  104. data/spec/assets/stripper/test +0 -0
  105. data/spec/assets/stripper/test.c +0 -0
  106. data/spec/assets/stripper/test.js +0 -0
  107. data/spec/assets/stripper/test.o +0 -0
  108. data/spec/assets/stripper/test.rb +1 -0
  109. data/spec/comment_extractor/code_object/comment_spec.rb +15 -0
  110. data/spec/comment_extractor/code_object_spec.rb +18 -0
  111. data/spec/comment_extractor/code_objects_spec.rb +66 -0
  112. data/spec/comment_extractor/configuration_spec.rb +68 -0
  113. data/spec/comment_extractor/encoding_spec.rb +77 -0
  114. data/spec/comment_extractor/extractor/c_spec.rb +9 -0
  115. data/spec/comment_extractor/extractor/cc_spec.rb +9 -0
  116. data/spec/comment_extractor/extractor/class_spec.rb +9 -0
  117. data/spec/comment_extractor/extractor/clojure_spec.rb +9 -0
  118. data/spec/comment_extractor/extractor/coffee_spec.rb +9 -0
  119. data/spec/comment_extractor/extractor/cpp_spec.rb +9 -0
  120. data/spec/comment_extractor/extractor/cs_spec.rb +9 -0
  121. data/spec/comment_extractor/extractor/css_spec.rb +9 -0
  122. data/spec/comment_extractor/extractor/cxx_spec.rb +9 -0
  123. data/spec/comment_extractor/extractor/d_spec.rb +10 -0
  124. data/spec/comment_extractor/extractor/erlang_spec.rb +10 -0
  125. data/spec/comment_extractor/extractor/fortran_spec.rb +9 -0
  126. data/spec/comment_extractor/extractor/go_spec.rb +9 -0
  127. data/spec/comment_extractor/extractor/h_spec.rb +9 -0
  128. data/spec/comment_extractor/extractor/haml_spec.rb +9 -0
  129. data/spec/comment_extractor/extractor/haskell_spec.rb +9 -0
  130. data/spec/comment_extractor/extractor/hpp_spec.rb +9 -0
  131. data/spec/comment_extractor/extractor/html_spec.rb +9 -0
  132. data/spec/comment_extractor/extractor/java_script_spec.rb +10 -0
  133. data/spec/comment_extractor/extractor/java_spec.rb +9 -0
  134. data/spec/comment_extractor/extractor/lisp_spec.rb +9 -0
  135. data/spec/comment_extractor/extractor/lua_spec.rb +9 -0
  136. data/spec/comment_extractor/extractor/m_spec.rb +9 -0
  137. data/spec/comment_extractor/extractor/markdown_spec.rb +8 -0
  138. data/spec/comment_extractor/extractor/mm_spec.rb +9 -0
  139. data/spec/comment_extractor/extractor/perl_spec.rb +9 -0
  140. data/spec/comment_extractor/extractor/php_spec.rb +9 -0
  141. data/spec/comment_extractor/extractor/python_spec.rb +9 -0
  142. data/spec/comment_extractor/extractor/ruby_spec.rb +12 -0
  143. data/spec/comment_extractor/extractor/sass_spec.rb +9 -0
  144. data/spec/comment_extractor/extractor/scala_spec.rb +9 -0
  145. data/spec/comment_extractor/extractor/scss_spec.rb +9 -0
  146. data/spec/comment_extractor/extractor/shell_spec.rb +9 -0
  147. data/spec/comment_extractor/extractor/sqf_spec.rb +9 -0
  148. data/spec/comment_extractor/extractor/sql_spec.rb +9 -0
  149. data/spec/comment_extractor/extractor/sqs_spec.rb +9 -0
  150. data/spec/comment_extractor/extractor/tex_spec.rb +9 -0
  151. data/spec/comment_extractor/extractor/text_spec.rb +7 -0
  152. data/spec/comment_extractor/extractor/yaml_spec.rb +9 -0
  153. data/spec/comment_extractor/extractor_manager_spec.rb +233 -0
  154. data/spec/comment_extractor/extractor_spec.rb +102 -0
  155. data/spec/comment_extractor/file_spec.rb +100 -0
  156. data/spec/comment_extractor/parser_spec.rb +67 -0
  157. data/spec/comment_extractor/smart_string_scanner_spec.rb +24 -0
  158. data/spec/comment_extractor/version_spec.rb +8 -0
  159. data/spec/comment_extractor_spec.rb +15 -0
  160. data/spec/spec_helper.rb +22 -0
  161. data/spec/support/rspec/comment_extractor/extractor_example_group.rb +115 -0
  162. data/spec/support/rspec/comment_extractor/matchers/extract_comment.rb +58 -0
  163. data/spec/support/rspec/comment_extractor/matchers.rb +7 -0
  164. data/spec/support/rspec/comment_extractor.rb +6 -0
  165. metadata +370 -0
@@ -0,0 +1,12 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Erlang < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ shebang /escript/
7
+ filename /\.(?:erl|es)$/
8
+ filetype 'erlang'
9
+
10
+ define_default_bracket
11
+ comment start_with: /%+/
12
+ end
@@ -0,0 +1,11 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Fortran < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.(?:f|f90|F|F90)$/
7
+ filetype 'fortran'
8
+
9
+ define_default_bracket
10
+ comment start_with: '!'
11
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Go < CommentExtractor::Extractor
4
+ filename /\.go$/
5
+ filetype 'go'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::H < CommentExtractor::Extractor
4
+ filename /\.h$/
5
+ filetype 'h'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,49 @@
1
+ # Copyright (c) 2006-2009 Hampton Catlin and Nathan Weizenbaum and @alpaca-tc
2
+
3
+ require 'haml'
4
+ require 'comment_extractor/extractor'
5
+
6
+ class CommentExtractor::Extractor::Haml < CommentExtractor::Extractor
7
+ filename /\.haml$/
8
+ filetype 'haml'
9
+
10
+ # [review] - incompleted method
11
+ def scan
12
+ options = ::Haml::Options.new
13
+ parser = ::Haml::Parser.new(self.content, options)
14
+ parsered = parser.parse
15
+ parsered.children.each do |node|
16
+ detect_comment_from_node(node)
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def detect_comment_from_node(node)
23
+ case node.type
24
+ when :haml_comment
25
+ identify_single_line_comment_from_comment_tag(node)
26
+ when :tag, :script
27
+ identify_single_line_comment_from_tag(node)
28
+ end
29
+
30
+ node.children.each do |child|
31
+ detect_comment_from_node(child)
32
+ end
33
+ end
34
+
35
+ def identify_single_line_comment_from_tag(node)
36
+ # [todo] - refactoring
37
+ if /#(?<comment>[^#]*)$/ =~ (node.value[:text])
38
+ code_objects << build_comment(node.line, comment) unless comment.empty?
39
+ end
40
+
41
+ if /#(?<comment>[^#]*)$/ =~ node.value[:value]
42
+ code_objects << build_comment(node.line, comment) unless comment.empty?
43
+ end
44
+ end
45
+
46
+ def identify_single_line_comment_from_comment_tag(node)
47
+ code_objects << build_comment(node.line, node.value[:text]) unless node.value[:text].empty?
48
+ end
49
+ end
@@ -0,0 +1,12 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Haskell < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.hs$/
7
+ filetype 'haskell'
8
+
9
+ define_default_bracket
10
+ comment start_with: '--'
11
+ comment start_with: '{-', end_with: '-}', type: BLOCK_COMMENT
12
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Hpp < CommentExtractor::Extractor
4
+ filename /\.hpp$/
5
+ filetype 'hpp'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,13 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Html < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.html$/
7
+ filetype 'html'
8
+
9
+ define_default_bracket
10
+ define_ignore_patterns /<\s*script[^>]*>.*?<\/script\s*>/mi
11
+
12
+ comment start_with: '<!--', end_with: '-->', type: BLOCK_COMMENT
13
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Java < CommentExtractor::Extractor
4
+ filename /\.java$/
5
+ filetype 'java'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,12 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::JavaScript < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
6
+
7
+ filename /\.js$/
8
+ filetype 'javascript'
9
+ shebang /.*(?:js|node)$/
10
+
11
+ define_regexp_bracket
12
+ end
@@ -0,0 +1,11 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Lisp < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.el$/
7
+ filetype 'lisp'
8
+
9
+ define_bracket '"'
10
+ comment start_with: /;+/
11
+ end
@@ -0,0 +1,12 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Lua < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.lua$/
7
+ filetype 'lua'
8
+
9
+ define_default_bracket
10
+ comment start_with: '--\[\[', end_with: /\s*\]\]/, type: BLOCK_COMMENT
11
+ comment start_with: '--'
12
+ end
@@ -0,0 +1,9 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ # Objective-C
4
+ class CommentExtractor::Extractor::M < CommentExtractor::Extractor
5
+ filename /\.m$/
6
+ filetype 'm'
7
+
8
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
9
+ end
@@ -0,0 +1,7 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Markdown < CommentExtractor::Extractor
4
+ disable!
5
+ filename /\.(?:md|markdown|mkd)$/
6
+ filetype 'markdown'
7
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Mm < CommentExtractor::Extractor
4
+ filename /\.mm$/
5
+ filetype 'mm'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,12 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Perl < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.(?:pm|pl)$/
7
+ filetype 'perl'
8
+
9
+ define_default_bracket
10
+ comment start_with: /^=pod/, end_with: /^=cut/, type: BLOCK_COMMENT
11
+ comment start_with: '#'
12
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Php < CommentExtractor::Extractor
4
+ filename /\.php$/
5
+ filetype 'php'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,13 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Python < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.py$/
7
+ filetype 'py'
8
+
9
+ define_default_bracket
10
+ comment start_with: '"""', end_with: '"""', type: BLOCK_COMMENT
11
+ comment start_with: '"""', end_with: '"""', type: BLOCK_COMMENT
12
+ comment start_with: '#'
13
+ end
@@ -0,0 +1,40 @@
1
+ require 'rdoc'
2
+ require 'comment_extractor/extractor'
3
+
4
+ class CommentExtractor::Extractor::Ruby < CommentExtractor::Extractor
5
+ include CommentExtractor::CodeObject::Comment::Type
6
+
7
+ filename /(?:Rakefile|Gemfile|\.rb|\.gemspec|Guardfile|config.ru)$/
8
+ filetype 'ruby'
9
+ shebang /.*ruby$/
10
+
11
+ class Options < Hash
12
+ def tab_width
13
+ 2
14
+ end
15
+ end
16
+
17
+ def scan
18
+ tokens = RDoc::RubyLex.tokenize(content, Options.new)
19
+
20
+ tokens.each do |token|
21
+ case token
22
+ when RDoc::RubyToken::TkRD_COMMENT # =begin ... =end
23
+ token.value.split("\n").each_with_index do |comment, index|
24
+ line_no = token.line_no + 1 + index
25
+ add_comment(line_no, comment, type: BLOCK_COMMENT)
26
+ end
27
+ when RDoc::RubyToken::TkCOMMENT # # ...
28
+ comment = token.value.sub(/^\s*#\s?/, '')
29
+ add_comment(token.line_no, comment, type: ONE_LINER_COMMENT)
30
+ end
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def add_comment(line, comment, **metadata)
37
+ comment_object = build_comment(line, comment, **metadata)
38
+ code_objects << comment_object
39
+ end
40
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Sass < CommentExtractor::Extractor
4
+ filename /\.sass$/
5
+ filetype 'sass'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Scala < CommentExtractor::Extractor
4
+ filename /\.scala$/
5
+ filetype 'scala'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Scss < CommentExtractor::Extractor
4
+ filename /\.scss$/
5
+ filetype 'scss'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,11 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Shell < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /(?:\.(?:zsh|bash|sh)|zshrc|bashrc)$/
7
+ filetype %w[bash sh zsh]
8
+
9
+ define_default_bracket
10
+ comment start_with: '#'
11
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Sqf < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
5
+
6
+ filename /\.sqf$/
7
+ filetype 'sqf'
8
+ end
@@ -0,0 +1,12 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Sql < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.sql$/
7
+ filetype 'sql'
8
+
9
+ define_default_bracket
10
+ comment start_with: '--'
11
+ comment start_with: '/\*', end_with: '\*/', type: BLOCK_COMMENT
12
+ end
@@ -0,0 +1,7 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Sqs < CommentExtractor::Extractor
4
+ disable!
5
+ filename /\.sqs$/
6
+ filetype 'sqs'
7
+ end
@@ -0,0 +1,12 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Tex < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.tex$/
7
+ filetype 'tex'
8
+
9
+ define_bracket '{'
10
+ comment start_with: /(?<!\\)%/
11
+ define_ignore_patterns /\\%/
12
+ end
@@ -0,0 +1,10 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Text < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.txt$/
7
+ filetype 'text'
8
+
9
+ comment start_with: /#/
10
+ end
@@ -0,0 +1,12 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Yaml < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.yml$/
7
+ filetype 'yaml'
8
+
9
+ define_ignore_patterns /^\s*[^#]+$/
10
+ comment start_with: /^s*#/
11
+ comment start_with: /\s#(?=[^#]*)$/
12
+ end
@@ -0,0 +1,96 @@
1
+ require 'strscan'
2
+ require 'comment_extractor/code_object'
3
+ require 'comment_extractor/code_objects'
4
+ require 'comment_extractor/extractor/concerns/simple_extractor'
5
+ require 'comment_extractor/extractor/concerns/slash_extractor'
6
+ require 'comment_extractor/version'
7
+
8
+ module CommentExtractor
9
+ class Extractor
10
+ class Error < RuntimeError; end
11
+ class SyntaxDefinitionError < RuntimeError; end
12
+
13
+ REGEXP = {
14
+ BREAK: /(?:\r?\n|\r)/,
15
+ }.freeze
16
+ SCHAME_ACCESSOR_NAMES = %i[shebang filetype filename]
17
+
18
+ attr_reader :content, :code_objects
19
+
20
+ def self.disabled?
21
+ @status == :disable
22
+ end
23
+
24
+ def self.disable!
25
+ @status = :disable
26
+ end
27
+
28
+ def self.schema_accessor(*keys)
29
+ keys.each do |key|
30
+ define_singleton_method key do |value = nil|
31
+ if value
32
+ self.schema[key] = value
33
+ else
34
+ self.schema[key]
35
+ end
36
+ end
37
+ end
38
+ end
39
+ schema_accessor *SCHAME_ACCESSOR_NAMES
40
+
41
+ def self.schema
42
+ @schema ||= {}
43
+ end
44
+
45
+ def initialize(content, code_objects = nil)
46
+ @content = content
47
+ @code_objects = code_objects || CodeObjects.new
48
+ end
49
+
50
+ # #extract_comments should retrun CodeObjects contains instance
51
+ # of CodeObject::Comment
52
+ def extract_comments
53
+ @extracted_comments ||= begin
54
+ scan
55
+ code_objects
56
+ end
57
+ end
58
+
59
+ protected
60
+
61
+ def scan
62
+ raise NotImplementedError, "You must implement #{self.class}##{__method__}"
63
+ end
64
+
65
+ def scanner
66
+ @scanner ||= build_scanner
67
+ end
68
+
69
+ def build_scanner
70
+ StringScanner.new(@content)
71
+ end
72
+
73
+ def build_comment(line, comment, **metadata)
74
+ metadata[:extractor] = self
75
+ CodeObject::Comment.new(line: line, value: comment, **metadata)
76
+ end
77
+
78
+ private
79
+
80
+ def raise_report
81
+ content = "Content:\n#{@content}"
82
+
83
+ raise SyntaxDefinitionError, <<-MSG.gsub(/^\s*/, '') + content
84
+ Error occurred.
85
+ Please report to <https://github.com/alpaca-tc/comment_extractor/issues>
86
+
87
+ - - -
88
+
89
+ CommentExtractor #{CommentExtractor::VERSION}
90
+
91
+ Date: #{Time.now}
92
+ Extractor: #{self.class}
93
+ MSG
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,158 @@
1
+ require 'comment_extractor'
2
+ require 'comment_extractor/extractor'
3
+ require 'comment_extractor/file'
4
+
5
+ using CommentExtractor::DetectableSchemeFile
6
+
7
+ module CommentExtractor
8
+ module ExtractorManager
9
+ class << self
10
+ def default_extractors
11
+ %i[
12
+ C Cc Class Clojure Coffee Cpp
13
+ Cs Css Cxx D Erlang Fortran Go H Haml
14
+ Haskell Hpp Html Java JavaScript Lisp
15
+ Lua M Markdown Mm Perl Php Python
16
+ Ruby Sass Scala Scss Shell Sqf
17
+ Sql Sqs Tex Yaml
18
+ ]
19
+ end
20
+
21
+ def regist_extractor(klass_or_symbol)
22
+ @extractor_definitions = nil
23
+ extractor = klass_or_symbol.is_a?(Extractor) ? klass_or_symbol : nil
24
+ extractors[:"#{klass_or_symbol}"] = extractor
25
+
26
+ unless extractor
27
+ filename = "#{klass_or_symbol}".gsub(/\W/, '').gsub(/::/, '/').
28
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
29
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
30
+ tr("-", "_").
31
+ downcase
32
+ file_path = "comment_extractor/extractor/#{filename}"
33
+ Extractor.autoload klass_or_symbol, file_path
34
+ end
35
+ end
36
+
37
+ def can_extract(file_path)
38
+ return if File.binary?(file_path)
39
+
40
+ extractor = nil
41
+ if shebang = File.shebang(file_path)
42
+ extractor = find_extractor_by_shebang(shebang)
43
+ end
44
+
45
+ unless extractor
46
+ extractor = find_extractor_by_filename(file_path)
47
+ end
48
+
49
+ if ::CommentExtractor.configuration.use_default_extractor
50
+ extractor = default_extractor unless extractor
51
+ end
52
+
53
+ extractor
54
+ end
55
+
56
+ private
57
+
58
+ def defined_extractor_finders
59
+ @defined_extractor_finders ||= []
60
+ end
61
+
62
+ def define_extractor_finder_by(*keys)
63
+ defined_extractor_finders.concat(keys)
64
+
65
+ keys.each do |key|
66
+ define_singleton_method "find_extractor_by_#{key}" do |value|
67
+ find_extractor_by(key, value)
68
+ end
69
+ end
70
+ end
71
+
72
+ def initialize_extractors!(new_extractors = default_extractors)
73
+ new_extractors.each do |extractor|
74
+ self.regist_extractor(extractor)
75
+ end
76
+
77
+ self
78
+ end
79
+
80
+ def find_extractor_by(key, value)
81
+ case key
82
+ when :filename, :shebang
83
+ # Regexp optimization which can find value O(1)
84
+ if extractor_definitions[key][:regexp] =~ value
85
+ index = $~[1..-1].rindex($~[0])
86
+ extractor_definitions[key][:values][index]
87
+ end
88
+ when :filetype
89
+ extractor_definitions[:filetype][value]
90
+ end
91
+ end
92
+
93
+ def extractor_definitions
94
+ @extractor_definitions ||= build_extractor_definitions
95
+ end
96
+
97
+ def build_extractor_definitions
98
+ definitions = Hash.new { |h,k| h[k] = { regexp: nil, values: [] } }
99
+
100
+ finders = defined_extractor_finders.dup
101
+ finders.delete(:filetype)
102
+ definitions[:filetype] = build_filetype_extractor_definitions
103
+
104
+ finders.each do |finder|
105
+ regexp_keys = []
106
+ values = []
107
+
108
+ extractors.each do |name, value|
109
+ extractor = extractors[name] = value || Extractor.const_get(name)
110
+
111
+ next if extractor.disabled?
112
+
113
+ if schema = extractor.send(finder)
114
+ # [review] - Maybe my optimization way is not better
115
+ regexp_source = schema.is_a?(Regexp) ? schema.source : schema
116
+ regexp_keys << "(#{regexp_source})"
117
+ values << extractor
118
+ end
119
+ end
120
+
121
+ unless values.empty?
122
+ definitions[finder][:values] = values
123
+ definitions[finder][:regexp] = Regexp.new(regexp_keys.join('|'))
124
+ end
125
+ end
126
+
127
+ definitions
128
+ end
129
+
130
+ def build_filetype_extractor_definitions
131
+ definitions = Hash.new { |h,k| h[k] = [] }
132
+
133
+ extractors.each_with_object(definitions) do |(name, value), memo|
134
+ extractor = extractors[name] = value || Extractor.const_get(name)
135
+ filetypes = *extractor.filetype
136
+ filetypes.each { |filetype| memo[filetype] = extractor }
137
+ end
138
+ end
139
+
140
+ def extractors
141
+ return @extractors if @extractors
142
+ @extractors = {}
143
+ initialize_extractors!
144
+
145
+ @extractors
146
+ end
147
+
148
+ private
149
+
150
+ def default_extractor
151
+ ::CommentExtractor.configuration.default_extractor
152
+ end
153
+ end
154
+
155
+ # define :find_extractor_by_shebang, :find_extractor_by_filename
156
+ define_extractor_finder_by *Extractor::SCHAME_ACCESSOR_NAMES
157
+ end
158
+ end