comment_extractor 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (165) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +132 -0
  4. data/bin/comment_parser_debug +45 -0
  5. data/lib/comment_extractor/code_object/comment.rb +19 -0
  6. data/lib/comment_extractor/code_object.rb +12 -0
  7. data/lib/comment_extractor/code_objects.rb +46 -0
  8. data/lib/comment_extractor/configuration.rb +50 -0
  9. data/lib/comment_extractor/encoding.rb +40 -0
  10. data/lib/comment_extractor/extractor/c.rb +8 -0
  11. data/lib/comment_extractor/extractor/cc.rb +8 -0
  12. data/lib/comment_extractor/extractor/class.rb +8 -0
  13. data/lib/comment_extractor/extractor/clojure.rb +11 -0
  14. data/lib/comment_extractor/extractor/coffee.rb +13 -0
  15. data/lib/comment_extractor/extractor/concerns/simple_extractor.rb +189 -0
  16. data/lib/comment_extractor/extractor/concerns/slash_extractor.rb +16 -0
  17. data/lib/comment_extractor/extractor/cpp.rb +8 -0
  18. data/lib/comment_extractor/extractor/cs.rb +8 -0
  19. data/lib/comment_extractor/extractor/css.rb +8 -0
  20. data/lib/comment_extractor/extractor/cxx.rb +8 -0
  21. data/lib/comment_extractor/extractor/d.rb +9 -0
  22. data/lib/comment_extractor/extractor/erlang.rb +12 -0
  23. data/lib/comment_extractor/extractor/fortran.rb +11 -0
  24. data/lib/comment_extractor/extractor/go.rb +8 -0
  25. data/lib/comment_extractor/extractor/h.rb +8 -0
  26. data/lib/comment_extractor/extractor/haml.rb +49 -0
  27. data/lib/comment_extractor/extractor/haskell.rb +12 -0
  28. data/lib/comment_extractor/extractor/hpp.rb +8 -0
  29. data/lib/comment_extractor/extractor/html.rb +13 -0
  30. data/lib/comment_extractor/extractor/java.rb +8 -0
  31. data/lib/comment_extractor/extractor/java_script.rb +12 -0
  32. data/lib/comment_extractor/extractor/lisp.rb +11 -0
  33. data/lib/comment_extractor/extractor/lua.rb +12 -0
  34. data/lib/comment_extractor/extractor/m.rb +9 -0
  35. data/lib/comment_extractor/extractor/markdown.rb +7 -0
  36. data/lib/comment_extractor/extractor/mm.rb +8 -0
  37. data/lib/comment_extractor/extractor/perl.rb +12 -0
  38. data/lib/comment_extractor/extractor/php.rb +8 -0
  39. data/lib/comment_extractor/extractor/python.rb +13 -0
  40. data/lib/comment_extractor/extractor/ruby.rb +40 -0
  41. data/lib/comment_extractor/extractor/sass.rb +8 -0
  42. data/lib/comment_extractor/extractor/scala.rb +8 -0
  43. data/lib/comment_extractor/extractor/scss.rb +8 -0
  44. data/lib/comment_extractor/extractor/shell.rb +11 -0
  45. data/lib/comment_extractor/extractor/sqf.rb +8 -0
  46. data/lib/comment_extractor/extractor/sql.rb +12 -0
  47. data/lib/comment_extractor/extractor/sqs.rb +7 -0
  48. data/lib/comment_extractor/extractor/tex.rb +12 -0
  49. data/lib/comment_extractor/extractor/text.rb +10 -0
  50. data/lib/comment_extractor/extractor/yaml.rb +12 -0
  51. data/lib/comment_extractor/extractor.rb +96 -0
  52. data/lib/comment_extractor/extractor_manager.rb +158 -0
  53. data/lib/comment_extractor/file.rb +42 -0
  54. data/lib/comment_extractor/parser.rb +33 -0
  55. data/lib/comment_extractor/smart_string_scanner.rb +11 -0
  56. data/lib/comment_extractor/version.rb +4 -0
  57. data/lib/comment_extractor.rb +18 -0
  58. data/spec/assets/binary_file +0 -0
  59. data/spec/assets/shebang_file +3 -0
  60. data/spec/assets/source_code/c.c +158 -0
  61. data/spec/assets/source_code/cc.cc +24 -0
  62. data/spec/assets/source_code/class +0 -0
  63. data/spec/assets/source_code/clojure.clj +41 -0
  64. data/spec/assets/source_code/coffee.coffee +27 -0
  65. data/spec/assets/source_code/cpp.cpp +130 -0
  66. data/spec/assets/source_code/cs.cs +53 -0
  67. data/spec/assets/source_code/css.css +37 -0
  68. data/spec/assets/source_code/cxx +0 -0
  69. data/spec/assets/source_code/d.d +110 -0
  70. data/spec/assets/source_code/erlang.es +34 -0
  71. data/spec/assets/source_code/fortran.f +41 -0
  72. data/spec/assets/source_code/golang.go +61 -0
  73. data/spec/assets/source_code/h +0 -0
  74. data/spec/assets/source_code/haml.haml +26 -0
  75. data/spec/assets/source_code/haskell.hs +36 -0
  76. data/spec/assets/source_code/hpp +0 -0
  77. data/spec/assets/source_code/html.html +139 -0
  78. data/spec/assets/source_code/java.java +39 -0
  79. data/spec/assets/source_code/java_script.js +164 -0
  80. data/spec/assets/source_code/lisp.el +18 -0
  81. data/spec/assets/source_code/lua.lua +34 -0
  82. data/spec/assets/source_code/m +0 -0
  83. data/spec/assets/source_code/mm +0 -0
  84. data/spec/assets/source_code/perl.pl +36 -0
  85. data/spec/assets/source_code/php.php +31 -0
  86. data/spec/assets/source_code/python.py +139 -0
  87. data/spec/assets/source_code/ruby.rb +36 -0
  88. data/spec/assets/source_code/sass.sass +77 -0
  89. data/spec/assets/source_code/scala.scala +46 -0
  90. data/spec/assets/source_code/scss.scss +93 -0
  91. data/spec/assets/source_code/shell.sh +5 -0
  92. data/spec/assets/source_code/sqf +0 -0
  93. data/spec/assets/source_code/sql.sql +11 -0
  94. data/spec/assets/source_code/sqs +0 -0
  95. data/spec/assets/source_code/tex.tex +20 -0
  96. data/spec/assets/source_code/text.txt +15 -0
  97. data/spec/assets/source_code/vim +17 -0
  98. data/spec/assets/source_code/yaml.yml +44 -0
  99. data/spec/assets/stripper/children/children +0 -0
  100. data/spec/assets/stripper/children/children.c +0 -0
  101. data/spec/assets/stripper/children/children.js +0 -0
  102. data/spec/assets/stripper/children/children.o +0 -0
  103. data/spec/assets/stripper/children/children.rb +1 -0
  104. data/spec/assets/stripper/test +0 -0
  105. data/spec/assets/stripper/test.c +0 -0
  106. data/spec/assets/stripper/test.js +0 -0
  107. data/spec/assets/stripper/test.o +0 -0
  108. data/spec/assets/stripper/test.rb +1 -0
  109. data/spec/comment_extractor/code_object/comment_spec.rb +15 -0
  110. data/spec/comment_extractor/code_object_spec.rb +18 -0
  111. data/spec/comment_extractor/code_objects_spec.rb +66 -0
  112. data/spec/comment_extractor/configuration_spec.rb +68 -0
  113. data/spec/comment_extractor/encoding_spec.rb +77 -0
  114. data/spec/comment_extractor/extractor/c_spec.rb +9 -0
  115. data/spec/comment_extractor/extractor/cc_spec.rb +9 -0
  116. data/spec/comment_extractor/extractor/class_spec.rb +9 -0
  117. data/spec/comment_extractor/extractor/clojure_spec.rb +9 -0
  118. data/spec/comment_extractor/extractor/coffee_spec.rb +9 -0
  119. data/spec/comment_extractor/extractor/cpp_spec.rb +9 -0
  120. data/spec/comment_extractor/extractor/cs_spec.rb +9 -0
  121. data/spec/comment_extractor/extractor/css_spec.rb +9 -0
  122. data/spec/comment_extractor/extractor/cxx_spec.rb +9 -0
  123. data/spec/comment_extractor/extractor/d_spec.rb +10 -0
  124. data/spec/comment_extractor/extractor/erlang_spec.rb +10 -0
  125. data/spec/comment_extractor/extractor/fortran_spec.rb +9 -0
  126. data/spec/comment_extractor/extractor/go_spec.rb +9 -0
  127. data/spec/comment_extractor/extractor/h_spec.rb +9 -0
  128. data/spec/comment_extractor/extractor/haml_spec.rb +9 -0
  129. data/spec/comment_extractor/extractor/haskell_spec.rb +9 -0
  130. data/spec/comment_extractor/extractor/hpp_spec.rb +9 -0
  131. data/spec/comment_extractor/extractor/html_spec.rb +9 -0
  132. data/spec/comment_extractor/extractor/java_script_spec.rb +10 -0
  133. data/spec/comment_extractor/extractor/java_spec.rb +9 -0
  134. data/spec/comment_extractor/extractor/lisp_spec.rb +9 -0
  135. data/spec/comment_extractor/extractor/lua_spec.rb +9 -0
  136. data/spec/comment_extractor/extractor/m_spec.rb +9 -0
  137. data/spec/comment_extractor/extractor/markdown_spec.rb +8 -0
  138. data/spec/comment_extractor/extractor/mm_spec.rb +9 -0
  139. data/spec/comment_extractor/extractor/perl_spec.rb +9 -0
  140. data/spec/comment_extractor/extractor/php_spec.rb +9 -0
  141. data/spec/comment_extractor/extractor/python_spec.rb +9 -0
  142. data/spec/comment_extractor/extractor/ruby_spec.rb +12 -0
  143. data/spec/comment_extractor/extractor/sass_spec.rb +9 -0
  144. data/spec/comment_extractor/extractor/scala_spec.rb +9 -0
  145. data/spec/comment_extractor/extractor/scss_spec.rb +9 -0
  146. data/spec/comment_extractor/extractor/shell_spec.rb +9 -0
  147. data/spec/comment_extractor/extractor/sqf_spec.rb +9 -0
  148. data/spec/comment_extractor/extractor/sql_spec.rb +9 -0
  149. data/spec/comment_extractor/extractor/sqs_spec.rb +9 -0
  150. data/spec/comment_extractor/extractor/tex_spec.rb +9 -0
  151. data/spec/comment_extractor/extractor/text_spec.rb +7 -0
  152. data/spec/comment_extractor/extractor/yaml_spec.rb +9 -0
  153. data/spec/comment_extractor/extractor_manager_spec.rb +233 -0
  154. data/spec/comment_extractor/extractor_spec.rb +102 -0
  155. data/spec/comment_extractor/file_spec.rb +100 -0
  156. data/spec/comment_extractor/parser_spec.rb +67 -0
  157. data/spec/comment_extractor/smart_string_scanner_spec.rb +24 -0
  158. data/spec/comment_extractor/version_spec.rb +8 -0
  159. data/spec/comment_extractor_spec.rb +15 -0
  160. data/spec/spec_helper.rb +22 -0
  161. data/spec/support/rspec/comment_extractor/extractor_example_group.rb +115 -0
  162. data/spec/support/rspec/comment_extractor/matchers/extract_comment.rb +58 -0
  163. data/spec/support/rspec/comment_extractor/matchers.rb +7 -0
  164. data/spec/support/rspec/comment_extractor.rb +6 -0
  165. metadata +370 -0
@@ -0,0 +1,12 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Erlang < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ shebang /escript/
7
+ filename /\.(?:erl|es)$/
8
+ filetype 'erlang'
9
+
10
+ define_default_bracket
11
+ comment start_with: /%+/
12
+ end
@@ -0,0 +1,11 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Fortran < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.(?:f|f90|F|F90)$/
7
+ filetype 'fortran'
8
+
9
+ define_default_bracket
10
+ comment start_with: '!'
11
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Go < CommentExtractor::Extractor
4
+ filename /\.go$/
5
+ filetype 'go'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::H < CommentExtractor::Extractor
4
+ filename /\.h$/
5
+ filetype 'h'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,49 @@
1
+ # Copyright (c) 2006-2009 Hampton Catlin and Nathan Weizenbaum and @alpaca-tc
2
+
3
+ require 'haml'
4
+ require 'comment_extractor/extractor'
5
+
6
+ class CommentExtractor::Extractor::Haml < CommentExtractor::Extractor
7
+ filename /\.haml$/
8
+ filetype 'haml'
9
+
10
+ # [review] - incompleted method
11
+ def scan
12
+ options = ::Haml::Options.new
13
+ parser = ::Haml::Parser.new(self.content, options)
14
+ parsered = parser.parse
15
+ parsered.children.each do |node|
16
+ detect_comment_from_node(node)
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def detect_comment_from_node(node)
23
+ case node.type
24
+ when :haml_comment
25
+ identify_single_line_comment_from_comment_tag(node)
26
+ when :tag, :script
27
+ identify_single_line_comment_from_tag(node)
28
+ end
29
+
30
+ node.children.each do |child|
31
+ detect_comment_from_node(child)
32
+ end
33
+ end
34
+
35
+ def identify_single_line_comment_from_tag(node)
36
+ # [todo] - refactoring
37
+ if /#(?<comment>[^#]*)$/ =~ (node.value[:text])
38
+ code_objects << build_comment(node.line, comment) unless comment.empty?
39
+ end
40
+
41
+ if /#(?<comment>[^#]*)$/ =~ node.value[:value]
42
+ code_objects << build_comment(node.line, comment) unless comment.empty?
43
+ end
44
+ end
45
+
46
+ def identify_single_line_comment_from_comment_tag(node)
47
+ code_objects << build_comment(node.line, node.value[:text]) unless node.value[:text].empty?
48
+ end
49
+ end
@@ -0,0 +1,12 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Haskell < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.hs$/
7
+ filetype 'haskell'
8
+
9
+ define_default_bracket
10
+ comment start_with: '--'
11
+ comment start_with: '{-', end_with: '-}', type: BLOCK_COMMENT
12
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Hpp < CommentExtractor::Extractor
4
+ filename /\.hpp$/
5
+ filetype 'hpp'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,13 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Html < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.html$/
7
+ filetype 'html'
8
+
9
+ define_default_bracket
10
+ define_ignore_patterns /<\s*script[^>]*>.*?<\/script\s*>/mi
11
+
12
+ comment start_with: '<!--', end_with: '-->', type: BLOCK_COMMENT
13
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Java < CommentExtractor::Extractor
4
+ filename /\.java$/
5
+ filetype 'java'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,12 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::JavaScript < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
6
+
7
+ filename /\.js$/
8
+ filetype 'javascript'
9
+ shebang /.*(?:js|node)$/
10
+
11
+ define_regexp_bracket
12
+ end
@@ -0,0 +1,11 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Lisp < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.el$/
7
+ filetype 'lisp'
8
+
9
+ define_bracket '"'
10
+ comment start_with: /;+/
11
+ end
@@ -0,0 +1,12 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Lua < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.lua$/
7
+ filetype 'lua'
8
+
9
+ define_default_bracket
10
+ comment start_with: '--\[\[', end_with: /\s*\]\]/, type: BLOCK_COMMENT
11
+ comment start_with: '--'
12
+ end
@@ -0,0 +1,9 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ # Objective-C
4
+ class CommentExtractor::Extractor::M < CommentExtractor::Extractor
5
+ filename /\.m$/
6
+ filetype 'm'
7
+
8
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
9
+ end
@@ -0,0 +1,7 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Markdown < CommentExtractor::Extractor
4
+ disable!
5
+ filename /\.(?:md|markdown|mkd)$/
6
+ filetype 'markdown'
7
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Mm < CommentExtractor::Extractor
4
+ filename /\.mm$/
5
+ filetype 'mm'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,12 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Perl < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.(?:pm|pl)$/
7
+ filetype 'perl'
8
+
9
+ define_default_bracket
10
+ comment start_with: /^=pod/, end_with: /^=cut/, type: BLOCK_COMMENT
11
+ comment start_with: '#'
12
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Php < CommentExtractor::Extractor
4
+ filename /\.php$/
5
+ filetype 'php'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,13 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Python < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.py$/
7
+ filetype 'py'
8
+
9
+ define_default_bracket
10
+ comment start_with: '"""', end_with: '"""', type: BLOCK_COMMENT
11
+ comment start_with: '"""', end_with: '"""', type: BLOCK_COMMENT
12
+ comment start_with: '#'
13
+ end
@@ -0,0 +1,40 @@
1
+ require 'rdoc'
2
+ require 'comment_extractor/extractor'
3
+
4
+ class CommentExtractor::Extractor::Ruby < CommentExtractor::Extractor
5
+ include CommentExtractor::CodeObject::Comment::Type
6
+
7
+ filename /(?:Rakefile|Gemfile|\.rb|\.gemspec|Guardfile|config.ru)$/
8
+ filetype 'ruby'
9
+ shebang /.*ruby$/
10
+
11
+ class Options < Hash
12
+ def tab_width
13
+ 2
14
+ end
15
+ end
16
+
17
+ def scan
18
+ tokens = RDoc::RubyLex.tokenize(content, Options.new)
19
+
20
+ tokens.each do |token|
21
+ case token
22
+ when RDoc::RubyToken::TkRD_COMMENT # =begin ... =end
23
+ token.value.split("\n").each_with_index do |comment, index|
24
+ line_no = token.line_no + 1 + index
25
+ add_comment(line_no, comment, type: BLOCK_COMMENT)
26
+ end
27
+ when RDoc::RubyToken::TkCOMMENT # # ...
28
+ comment = token.value.sub(/^\s*#\s?/, '')
29
+ add_comment(token.line_no, comment, type: ONE_LINER_COMMENT)
30
+ end
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def add_comment(line, comment, **metadata)
37
+ comment_object = build_comment(line, comment, **metadata)
38
+ code_objects << comment_object
39
+ end
40
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Sass < CommentExtractor::Extractor
4
+ filename /\.sass$/
5
+ filetype 'sass'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Scala < CommentExtractor::Extractor
4
+ filename /\.scala$/
5
+ filetype 'scala'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Scss < CommentExtractor::Extractor
4
+ filename /\.scss$/
5
+ filetype 'scss'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,11 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Shell < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /(?:\.(?:zsh|bash|sh)|zshrc|bashrc)$/
7
+ filetype %w[bash sh zsh]
8
+
9
+ define_default_bracket
10
+ comment start_with: '#'
11
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Sqf < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
5
+
6
+ filename /\.sqf$/
7
+ filetype 'sqf'
8
+ end
@@ -0,0 +1,12 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Sql < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.sql$/
7
+ filetype 'sql'
8
+
9
+ define_default_bracket
10
+ comment start_with: '--'
11
+ comment start_with: '/\*', end_with: '\*/', type: BLOCK_COMMENT
12
+ end
@@ -0,0 +1,7 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Sqs < CommentExtractor::Extractor
4
+ disable!
5
+ filename /\.sqs$/
6
+ filetype 'sqs'
7
+ end
@@ -0,0 +1,12 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Tex < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.tex$/
7
+ filetype 'tex'
8
+
9
+ define_bracket '{'
10
+ comment start_with: /(?<!\\)%/
11
+ define_ignore_patterns /\\%/
12
+ end
@@ -0,0 +1,10 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Text < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.txt$/
7
+ filetype 'text'
8
+
9
+ comment start_with: /#/
10
+ end
@@ -0,0 +1,12 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Yaml < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.yml$/
7
+ filetype 'yaml'
8
+
9
+ define_ignore_patterns /^\s*[^#]+$/
10
+ comment start_with: /^s*#/
11
+ comment start_with: /\s#(?=[^#]*)$/
12
+ end
@@ -0,0 +1,96 @@
1
+ require 'strscan'
2
+ require 'comment_extractor/code_object'
3
+ require 'comment_extractor/code_objects'
4
+ require 'comment_extractor/extractor/concerns/simple_extractor'
5
+ require 'comment_extractor/extractor/concerns/slash_extractor'
6
+ require 'comment_extractor/version'
7
+
8
+ module CommentExtractor
9
+ class Extractor
10
+ class Error < RuntimeError; end
11
+ class SyntaxDefinitionError < RuntimeError; end
12
+
13
+ REGEXP = {
14
+ BREAK: /(?:\r?\n|\r)/,
15
+ }.freeze
16
+ SCHAME_ACCESSOR_NAMES = %i[shebang filetype filename]
17
+
18
+ attr_reader :content, :code_objects
19
+
20
+ def self.disabled?
21
+ @status == :disable
22
+ end
23
+
24
+ def self.disable!
25
+ @status = :disable
26
+ end
27
+
28
+ def self.schema_accessor(*keys)
29
+ keys.each do |key|
30
+ define_singleton_method key do |value = nil|
31
+ if value
32
+ self.schema[key] = value
33
+ else
34
+ self.schema[key]
35
+ end
36
+ end
37
+ end
38
+ end
39
+ schema_accessor *SCHAME_ACCESSOR_NAMES
40
+
41
+ def self.schema
42
+ @schema ||= {}
43
+ end
44
+
45
+ def initialize(content, code_objects = nil)
46
+ @content = content
47
+ @code_objects = code_objects || CodeObjects.new
48
+ end
49
+
50
+ # #extract_comments should retrun CodeObjects contains instance
51
+ # of CodeObject::Comment
52
+ def extract_comments
53
+ @extracted_comments ||= begin
54
+ scan
55
+ code_objects
56
+ end
57
+ end
58
+
59
+ protected
60
+
61
+ def scan
62
+ raise NotImplementedError, "You must implement #{self.class}##{__method__}"
63
+ end
64
+
65
+ def scanner
66
+ @scanner ||= build_scanner
67
+ end
68
+
69
+ def build_scanner
70
+ StringScanner.new(@content)
71
+ end
72
+
73
+ def build_comment(line, comment, **metadata)
74
+ metadata[:extractor] = self
75
+ CodeObject::Comment.new(line: line, value: comment, **metadata)
76
+ end
77
+
78
+ private
79
+
80
+ def raise_report
81
+ content = "Content:\n#{@content}"
82
+
83
+ raise SyntaxDefinitionError, <<-MSG.gsub(/^\s*/, '') + content
84
+ Error occurred.
85
+ Please report to <https://github.com/alpaca-tc/comment_extractor/issues>
86
+
87
+ - - -
88
+
89
+ CommentExtractor #{CommentExtractor::VERSION}
90
+
91
+ Date: #{Time.now}
92
+ Extractor: #{self.class}
93
+ MSG
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,158 @@
1
+ require 'comment_extractor'
2
+ require 'comment_extractor/extractor'
3
+ require 'comment_extractor/file'
4
+
5
+ using CommentExtractor::DetectableSchemeFile
6
+
7
+ module CommentExtractor
8
+ module ExtractorManager
9
+ class << self
10
+ def default_extractors
11
+ %i[
12
+ C Cc Class Clojure Coffee Cpp
13
+ Cs Css Cxx D Erlang Fortran Go H Haml
14
+ Haskell Hpp Html Java JavaScript Lisp
15
+ Lua M Markdown Mm Perl Php Python
16
+ Ruby Sass Scala Scss Shell Sqf
17
+ Sql Sqs Tex Yaml
18
+ ]
19
+ end
20
+
21
+ def regist_extractor(klass_or_symbol)
22
+ @extractor_definitions = nil
23
+ extractor = klass_or_symbol.is_a?(Extractor) ? klass_or_symbol : nil
24
+ extractors[:"#{klass_or_symbol}"] = extractor
25
+
26
+ unless extractor
27
+ filename = "#{klass_or_symbol}".gsub(/\W/, '').gsub(/::/, '/').
28
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
29
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
30
+ tr("-", "_").
31
+ downcase
32
+ file_path = "comment_extractor/extractor/#{filename}"
33
+ Extractor.autoload klass_or_symbol, file_path
34
+ end
35
+ end
36
+
37
+ def can_extract(file_path)
38
+ return if File.binary?(file_path)
39
+
40
+ extractor = nil
41
+ if shebang = File.shebang(file_path)
42
+ extractor = find_extractor_by_shebang(shebang)
43
+ end
44
+
45
+ unless extractor
46
+ extractor = find_extractor_by_filename(file_path)
47
+ end
48
+
49
+ if ::CommentExtractor.configuration.use_default_extractor
50
+ extractor = default_extractor unless extractor
51
+ end
52
+
53
+ extractor
54
+ end
55
+
56
+ private
57
+
58
+ def defined_extractor_finders
59
+ @defined_extractor_finders ||= []
60
+ end
61
+
62
+ def define_extractor_finder_by(*keys)
63
+ defined_extractor_finders.concat(keys)
64
+
65
+ keys.each do |key|
66
+ define_singleton_method "find_extractor_by_#{key}" do |value|
67
+ find_extractor_by(key, value)
68
+ end
69
+ end
70
+ end
71
+
72
+ def initialize_extractors!(new_extractors = default_extractors)
73
+ new_extractors.each do |extractor|
74
+ self.regist_extractor(extractor)
75
+ end
76
+
77
+ self
78
+ end
79
+
80
+ def find_extractor_by(key, value)
81
+ case key
82
+ when :filename, :shebang
83
+ # Regexp optimization which can find value O(1)
84
+ if extractor_definitions[key][:regexp] =~ value
85
+ index = $~[1..-1].rindex($~[0])
86
+ extractor_definitions[key][:values][index]
87
+ end
88
+ when :filetype
89
+ extractor_definitions[:filetype][value]
90
+ end
91
+ end
92
+
93
+ def extractor_definitions
94
+ @extractor_definitions ||= build_extractor_definitions
95
+ end
96
+
97
+ def build_extractor_definitions
98
+ definitions = Hash.new { |h,k| h[k] = { regexp: nil, values: [] } }
99
+
100
+ finders = defined_extractor_finders.dup
101
+ finders.delete(:filetype)
102
+ definitions[:filetype] = build_filetype_extractor_definitions
103
+
104
+ finders.each do |finder|
105
+ regexp_keys = []
106
+ values = []
107
+
108
+ extractors.each do |name, value|
109
+ extractor = extractors[name] = value || Extractor.const_get(name)
110
+
111
+ next if extractor.disabled?
112
+
113
+ if schema = extractor.send(finder)
114
+ # [review] - Maybe my optimization way is not better
115
+ regexp_source = schema.is_a?(Regexp) ? schema.source : schema
116
+ regexp_keys << "(#{regexp_source})"
117
+ values << extractor
118
+ end
119
+ end
120
+
121
+ unless values.empty?
122
+ definitions[finder][:values] = values
123
+ definitions[finder][:regexp] = Regexp.new(regexp_keys.join('|'))
124
+ end
125
+ end
126
+
127
+ definitions
128
+ end
129
+
130
+ def build_filetype_extractor_definitions
131
+ definitions = Hash.new { |h,k| h[k] = [] }
132
+
133
+ extractors.each_with_object(definitions) do |(name, value), memo|
134
+ extractor = extractors[name] = value || Extractor.const_get(name)
135
+ filetypes = *extractor.filetype
136
+ filetypes.each { |filetype| memo[filetype] = extractor }
137
+ end
138
+ end
139
+
140
+ def extractors
141
+ return @extractors if @extractors
142
+ @extractors = {}
143
+ initialize_extractors!
144
+
145
+ @extractors
146
+ end
147
+
148
+ private
149
+
150
+ def default_extractor
151
+ ::CommentExtractor.configuration.default_extractor
152
+ end
153
+ end
154
+
155
+ # define :find_extractor_by_shebang, :find_extractor_by_filename
156
+ define_extractor_finder_by *Extractor::SCHAME_ACCESSOR_NAMES
157
+ end
158
+ end