dependabot-linguist 0.0.1 → 0.212.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (223) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +25 -3
  3. data/Gemfile +0 -9
  4. data/Gemfile.lock +162 -5
  5. data/LICENSE +674 -1
  6. data/{LICENSE.Nonstandard → LICENSE.dependabot-core} +0 -0
  7. data/Makefile +8 -1
  8. data/README.md +39 -5
  9. data/demo_script.rb +50 -0
  10. data/dependabot-linguist.gemspec +16 -5
  11. data/devlog.md +186 -0
  12. data/exe/dependabot-linguist +145 -0
  13. data/lib/dependabot/linguist/dependabot_file_validator.rb +190 -0
  14. data/lib/dependabot/linguist/dependabot_patch.rb +8 -0
  15. data/lib/dependabot/linguist/file_fetchers/base.rb +48 -0
  16. data/lib/dependabot/linguist/file_fetchers/git_submodules.rb +63 -0
  17. data/lib/dependabot/linguist/file_fetchers/go_modules.rb +41 -0
  18. data/lib/dependabot/linguist/language.rb +62 -0
  19. data/lib/dependabot/linguist/languages_to_ecosystems/contexts.rb +234 -0
  20. data/lib/dependabot/linguist/languages_to_ecosystems/contexts_applied.rb +36 -0
  21. data/lib/dependabot/linguist/languages_to_ecosystems/languages.yaml +7950 -0
  22. data/lib/dependabot/linguist/languages_to_ecosystems/main.rb +55 -0
  23. data/lib/dependabot/linguist/languages_to_ecosystems/manager_ecosystem_maps.rb +154 -0
  24. data/lib/dependabot/linguist/languages_to_patch.txt +37 -0
  25. data/lib/dependabot/linguist/linguist_patch.rb +6 -0
  26. data/lib/dependabot/linguist/repository.rb +232 -0
  27. data/lib/dependabot/linguist/version.rb +1 -1
  28. data/lib/dependabot/linguist.rb +5 -2
  29. data/smoke-test/README.md +58 -0
  30. data/smoke-test/bundler/Gemfile +9 -0
  31. data/smoke-test/bundler/Gemfile.lock +47 -0
  32. data/smoke-test/bundler/dependabot-all-updates-test-staging.gemspec +10 -0
  33. data/smoke-test/cargo/Cargo.lock +90 -0
  34. data/smoke-test/cargo/Cargo.toml +8 -0
  35. data/smoke-test/composer/composer.json +5 -0
  36. data/smoke-test/composer/composer.lock +72 -0
  37. data/smoke-test/composer/vendor/autoload.php +7 -0
  38. data/smoke-test/composer/vendor/composer/ClassLoader.php +445 -0
  39. data/smoke-test/composer/vendor/composer/LICENSE +21 -0
  40. data/smoke-test/composer/vendor/composer/autoload_classmap.php +13 -0
  41. data/smoke-test/composer/vendor/composer/autoload_namespaces.php +9 -0
  42. data/smoke-test/composer/vendor/composer/autoload_psr4.php +9 -0
  43. data/smoke-test/composer/vendor/composer/autoload_real.php +55 -0
  44. data/smoke-test/composer/vendor/composer/autoload_static.php +23 -0
  45. data/smoke-test/composer/vendor/composer/installed.json +57 -0
  46. data/smoke-test/composer/vendor/phpmailer/phpmailer/.gitignore +3 -0
  47. data/smoke-test/composer/vendor/phpmailer/phpmailer/LICENSE +504 -0
  48. data/smoke-test/composer/vendor/phpmailer/phpmailer/README.md +112 -0
  49. data/smoke-test/composer/vendor/phpmailer/phpmailer/changelog.md +530 -0
  50. data/smoke-test/composer/vendor/phpmailer/phpmailer/class.phpmailer.php +2758 -0
  51. data/smoke-test/composer/vendor/phpmailer/phpmailer/class.pop3.php +417 -0
  52. data/smoke-test/composer/vendor/phpmailer/phpmailer/class.smtp.php +1075 -0
  53. data/smoke-test/composer/vendor/phpmailer/phpmailer/composer.json +32 -0
  54. data/smoke-test/composer/vendor/phpmailer/phpmailer/docs/Callback_function_notes.txt +17 -0
  55. data/smoke-test/composer/vendor/phpmailer/phpmailer/docs/DomainKeys_notes.txt +55 -0
  56. data/smoke-test/composer/vendor/phpmailer/phpmailer/docs/Note_for_SMTP_debugging.txt +23 -0
  57. data/smoke-test/composer/vendor/phpmailer/phpmailer/docs/extending.html +148 -0
  58. data/smoke-test/composer/vendor/phpmailer/phpmailer/docs/faq.html +67 -0
  59. data/smoke-test/composer/vendor/phpmailer/phpmailer/docs/generatedocs.sh +4 -0
  60. data/smoke-test/composer/vendor/phpmailer/phpmailer/docs/pop3_article.txt +39 -0
  61. data/smoke-test/composer/vendor/phpmailer/phpmailer/docs/use_gmail.txt +44 -0
  62. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/contents.html +20 -0
  63. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/images/phpmailer.gif +0 -0
  64. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/images/phpmailer_mini.gif +0 -0
  65. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/index.html +50 -0
  66. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/test_db_smtp_basic.php +58 -0
  67. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/test_mail_advanced.php +30 -0
  68. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/test_mail_basic.php +41 -0
  69. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/test_pop_before_smtp_advanced.php +39 -0
  70. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/test_pop_before_smtp_basic.php +49 -0
  71. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/test_sendmail_advanced.php +33 -0
  72. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/test_sendmail_basic.php +43 -0
  73. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/test_smtp_advanced.php +42 -0
  74. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/test_smtp_advanced_no_auth.php +36 -0
  75. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/test_smtp_basic.php +58 -0
  76. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/test_smtp_basic_no_auth.php +53 -0
  77. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/test_smtp_gmail_advanced.php +42 -0
  78. data/smoke-test/composer/vendor/phpmailer/phpmailer/examples/test_smtp_gmail_basic.php +59 -0
  79. data/smoke-test/composer/vendor/phpmailer/phpmailer/extras/class.html2text.inc +489 -0
  80. data/smoke-test/composer/vendor/phpmailer/phpmailer/extras/htmlfilter.php +861 -0
  81. data/smoke-test/composer/vendor/phpmailer/phpmailer/extras/ntlm_sasl_client.php +185 -0
  82. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-ar.php +26 -0
  83. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-br.php +25 -0
  84. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-ca.php +25 -0
  85. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-ch.php +25 -0
  86. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-cz.php +24 -0
  87. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-de.php +24 -0
  88. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-dk.php +25 -0
  89. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-es.php +25 -0
  90. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-et.php +25 -0
  91. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-fi.php +26 -0
  92. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-fo.php +26 -0
  93. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-fr.php +24 -0
  94. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-hu.php +24 -0
  95. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-it.php +26 -0
  96. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-ja.php +25 -0
  97. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-nl.php +24 -0
  98. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-no.php +24 -0
  99. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-pl.php +24 -0
  100. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-ro.php +26 -0
  101. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-ru.php +24 -0
  102. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-se.php +25 -0
  103. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-sk.php +25 -0
  104. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-tr.php +26 -0
  105. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-zh.php +25 -0
  106. data/smoke-test/composer/vendor/phpmailer/phpmailer/language/phpmailer.lang-zh_cn.php +25 -0
  107. data/smoke-test/composer/vendor/phpmailer/phpmailer/test/contents.html +10 -0
  108. data/smoke-test/composer/vendor/phpmailer/phpmailer/test/phpmailerTest.php +1084 -0
  109. data/smoke-test/composer/vendor/phpmailer/phpmailer/test/test.png +0 -0
  110. data/smoke-test/composer/vendor/phpmailer/phpmailer/test/test_callback.php +84 -0
  111. data/smoke-test/composer/vendor/phpmailer/phpmailer/test/testemail.php +48 -0
  112. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/LGPLv3.txt +165 -0
  113. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/contents.html +14 -0
  114. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/images/aikido.gif +0 -0
  115. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/images/bkgrnd.gif +0 -0
  116. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/images/phpmailer.gif +0 -0
  117. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/index.php +427 -0
  118. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/clipboard.swf +0 -0
  119. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushBash.js +59 -0
  120. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushCSharp.js +64 -0
  121. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushCpp.js +99 -0
  122. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushCss.js +93 -0
  123. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushDelphi.js +57 -0
  124. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushDiff.js +43 -0
  125. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushGroovy.js +69 -0
  126. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushJScript.js +51 -0
  127. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushJava.js +55 -0
  128. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushPerl.js +74 -0
  129. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushPhp.js +91 -0
  130. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushPlain.js +35 -0
  131. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushPython.js +56 -0
  132. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushRuby.js +57 -0
  133. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushScala.js +53 -0
  134. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushSql.js +68 -0
  135. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushVb.js +58 -0
  136. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shBrushXml.js +71 -0
  137. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shCore.js +30 -0
  138. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/scripts/shLegacy.js +30 -0
  139. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/src/shCore.js +1949 -0
  140. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/src/shLegacy.js +172 -0
  141. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/styles/help.png +0 -0
  142. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/styles/magnifier.png +0 -0
  143. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/styles/page_white_code.png +0 -0
  144. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/styles/page_white_copy.png +0 -0
  145. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/styles/printer.png +0 -0
  146. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/styles/shCore.css +321 -0
  147. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/styles/shThemeDefault.css +191 -0
  148. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/styles/shThemeDjango.css +193 -0
  149. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/styles/shThemeEmacs.css +192 -0
  150. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/styles/shThemeFadeToGrey.css +193 -0
  151. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/styles/shThemeMidnight.css +192 -0
  152. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/styles/shThemeRDark.css +192 -0
  153. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/styles/wrapping.png +0 -0
  154. data/smoke-test/composer/vendor/phpmailer/phpmailer/test_script/test.html +46 -0
  155. data/smoke-test/dependabot-file/no-config/.github/TARGET.yaml +14 -0
  156. data/smoke-test/dependabot-file/no-config/bundler/Gemfile +9 -0
  157. data/smoke-test/dependabot-file/no-config/bundler/Gemfile.lock +47 -0
  158. data/smoke-test/dependabot-file/no-config/bundler/dependabot-all-updates-test-staging.gemspec +10 -0
  159. data/smoke-test/dependabot-file/no-config/cargo/Cargo.lock +90 -0
  160. data/smoke-test/dependabot-file/no-config/cargo/Cargo.toml +8 -0
  161. data/smoke-test/dependabot-file/no-config/composer/composer.json +5 -0
  162. data/smoke-test/dependabot-file/no-config/composer/composer.lock +72 -0
  163. data/smoke-test/dependabot-file/over-config/.github/TARGET.yaml +17 -0
  164. data/smoke-test/dependabot-file/over-config/.github/dependabot.yaml +12 -0
  165. data/smoke-test/dependabot-file/over-config/bundler/Gemfile +9 -0
  166. data/smoke-test/dependabot-file/over-config/bundler/Gemfile.lock +47 -0
  167. data/smoke-test/dependabot-file/over-config/bundler/dependabot-all-updates-test-staging.gemspec +10 -0
  168. data/smoke-test/dependabot-file/over-config/cargo/Cargo.lock +90 -0
  169. data/smoke-test/dependabot-file/over-config/cargo/Cargo.toml +8 -0
  170. data/smoke-test/dependabot-file/over-config/composer/composer.json +5 -0
  171. data/smoke-test/dependabot-file/over-config/composer/composer.lock +72 -0
  172. data/smoke-test/dependabot-file/overer-config/.github/TARGET.yaml +19 -0
  173. data/smoke-test/dependabot-file/overer-config/.github/dependabot.yaml +12 -0
  174. data/smoke-test/dependabot-file/overer-config/bundler/Gemfile +9 -0
  175. data/smoke-test/dependabot-file/overer-config/bundler/Gemfile.lock +47 -0
  176. data/smoke-test/dependabot-file/overer-config/bundler/dependabot-all-updates-test-staging.gemspec +10 -0
  177. data/smoke-test/dependabot-file/overer-config/cargo/Cargo.lock +90 -0
  178. data/smoke-test/dependabot-file/overer-config/cargo/Cargo.toml +8 -0
  179. data/smoke-test/dependabot-file/overer-config/composer/composer.json +5 -0
  180. data/smoke-test/dependabot-file/overer-config/composer/composer.lock +72 -0
  181. data/smoke-test/dependabot-file/partial-config/.github/TARGET.yaml +17 -0
  182. data/smoke-test/dependabot-file/partial-config/.github/dependabot.yaml +7 -0
  183. data/smoke-test/dependabot-file/partial-config/bundler/Gemfile +9 -0
  184. data/smoke-test/dependabot-file/partial-config/bundler/Gemfile.lock +47 -0
  185. data/smoke-test/dependabot-file/partial-config/bundler/dependabot-all-updates-test-staging.gemspec +10 -0
  186. data/smoke-test/dependabot-file/partial-config/cargo/Cargo.lock +90 -0
  187. data/smoke-test/dependabot-file/partial-config/cargo/Cargo.toml +8 -0
  188. data/smoke-test/dependabot-file/partial-config/composer/composer.json +5 -0
  189. data/smoke-test/dependabot-file/partial-config/composer/composer.lock +72 -0
  190. data/smoke-test/docker/Dockerfile +19 -0
  191. data/smoke-test/elm/elm-package.json +15 -0
  192. data/smoke-test/elm/elm.json +31 -0
  193. data/smoke-test/github-actions/both/.github/workflows/this.yaml +11 -0
  194. data/smoke-test/github-actions/both/yaml/action.yml +13 -0
  195. data/smoke-test/github-actions/invalid/invalid_file.yaml +0 -0
  196. data/smoke-test/github-actions/workflow/.github/workflows/this.yaml +11 -0
  197. data/smoke-test/github-actions/yaml/action.yml +13 -0
  198. data/smoke-test/gitsubmodule/.gitmodules +4 -0
  199. data/smoke-test/gomod/go.mod +16 -0
  200. data/smoke-test/gomod/go.sum +18 -0
  201. data/smoke-test/gomod/gomain.go +14 -0
  202. data/smoke-test/gradle/.gitignore +1 -0
  203. data/smoke-test/gradle/build.gradle +65 -0
  204. data/smoke-test/maven/pom.xml +65 -0
  205. data/smoke-test/mix/mix.exs +24 -0
  206. data/smoke-test/mix/mix.lock +5 -0
  207. data/smoke-test/npm/package-lock.json +166 -0
  208. data/smoke-test/npm/package.json +22 -0
  209. data/smoke-test/npm/removed/package-lock.json +44 -0
  210. data/smoke-test/npm/removed/package.json +15 -0
  211. data/smoke-test/nuget/project.csproj +14 -0
  212. data/smoke-test/pip/pip/requirements.txt +5 -0
  213. data/smoke-test/pip/pip-compile/requirements.in +2 -0
  214. data/smoke-test/pip/pip-compile/requirements.txt +16 -0
  215. data/smoke-test/pip/pipenv/Pipfile +13 -0
  216. data/smoke-test/pip/pipenv/Pipfile.lock +86 -0
  217. data/smoke-test/pip/poetry/poetry.lock +33 -0
  218. data/smoke-test/pip/poetry/pyproject.toml +18 -0
  219. data/smoke-test/pub/pubspec.lock +40 -0
  220. data/smoke-test/pub/pubspec.yaml +10 -0
  221. data/smoke-test/terraform/main.tf +143 -0
  222. metadata +314 -6
  223. data/LICENSE.GPL-3.0-only +0 -674
@@ -0,0 +1,489 @@
1
+ <?php
2
+
3
+ /*************************************************************************
4
+ * *
5
+ * class.html2text.inc *
6
+ * *
7
+ *************************************************************************
8
+ * *
9
+ * Converts HTML to formatted plain text *
10
+ * *
11
+ * Copyright (c) 2005-2007 Jon Abernathy <jon@chuggnutt.com> *
12
+ * All rights reserved. *
13
+ * *
14
+ * This script is free software; you can redistribute it and/or modify *
15
+ * it under the terms of the GNU General Public License as published by *
16
+ * the Free Software Foundation; either version 2 of the License, or *
17
+ * (at your option) any later version. *
18
+ * *
19
+ * The GNU General Public License can be found at *
20
+ * http://www.gnu.org/copyleft/gpl.html. *
21
+ * *
22
+ * This script is distributed in the hope that it will be useful, *
23
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
24
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
25
+ * GNU General Public License for more details. *
26
+ * *
27
+ * Author(s): Jon Abernathy <jon@chuggnutt.com> *
28
+ * *
29
+ * Last modified: 08/08/07 *
30
+ * *
31
+ *************************************************************************/
32
+
33
+
34
+ /**
35
+ * Takes HTML and converts it to formatted, plain text.
36
+ *
37
+ * Thanks to Alexander Krug (http://www.krugar.de/) to pointing out and
38
+ * correcting an error in the regexp search array. Fixed 7/30/03.
39
+ *
40
+ * Updated set_html() function's file reading mechanism, 9/25/03.
41
+ *
42
+ * Thanks to Joss Sanglier (http://www.dancingbear.co.uk/) for adding
43
+ * several more HTML entity codes to the $search and $replace arrays.
44
+ * Updated 11/7/03.
45
+ *
46
+ * Thanks to Darius Kasperavicius (http://www.dar.dar.lt/) for
47
+ * suggesting the addition of $allowed_tags and its supporting function
48
+ * (which I slightly modified). Updated 3/12/04.
49
+ *
50
+ * Thanks to Justin Dearing for pointing out that a replacement for the
51
+ * <TH> tag was missing, and suggesting an appropriate fix.
52
+ * Updated 8/25/04.
53
+ *
54
+ * Thanks to Mathieu Collas (http://www.myefarm.com/) for finding a
55
+ * display/formatting bug in the _build_link_list() function: email
56
+ * readers would show the left bracket and number ("[1") as part of the
57
+ * rendered email address.
58
+ * Updated 12/16/04.
59
+ *
60
+ * Thanks to Wojciech Bajon (http://histeria.pl/) for submitting code
61
+ * to handle relative links, which I hadn't considered. I modified his
62
+ * code a bit to handle normal HTTP links and MAILTO links. Also for
63
+ * suggesting three additional HTML entity codes to search for.
64
+ * Updated 03/02/05.
65
+ *
66
+ * Thanks to Jacob Chandler for pointing out another link condition
67
+ * for the _build_link_list() function: "https".
68
+ * Updated 04/06/05.
69
+ *
70
+ * Thanks to Marc Bertrand (http://www.dresdensky.com/) for
71
+ * suggesting a revision to the word wrapping functionality; if you
72
+ * specify a $width of 0 or less, word wrapping will be ignored.
73
+ * Updated 11/02/06.
74
+ *
75
+ * *** Big housecleaning updates below:
76
+ *
77
+ * Thanks to Colin Brown (http://www.sparkdriver.co.uk/) for
78
+ * suggesting the fix to handle </li> and blank lines (whitespace).
79
+ * Christian Basedau (http://www.movetheweb.de/) also suggested the
80
+ * blank lines fix.
81
+ *
82
+ * Special thanks to Marcus Bointon (http://www.synchromedia.co.uk/),
83
+ * Christian Basedau, Norbert Laposa (http://ln5.co.uk/),
84
+ * Bas van de Weijer, and Marijn van Butselaar
85
+ * for pointing out my glaring error in the <th> handling. Marcus also
86
+ * supplied a host of fixes.
87
+ *
88
+ * Thanks to Jeffrey Silverman (http://www.newtnotes.com/) for pointing
89
+ * out that extra spaces should be compressed--a problem addressed with
90
+ * Marcus Bointon's fixes but that I had not yet incorporated.
91
+ *
92
+ * Thanks to Daniel Schledermann (http://www.typoconsult.dk/) for
93
+ * suggesting a valuable fix with <a> tag handling.
94
+ *
95
+ * Thanks to Wojciech Bajon (again!) for suggesting fixes and additions,
96
+ * including the <a> tag handling that Daniel Schledermann pointed
97
+ * out but that I had not yet incorporated. I haven't (yet)
98
+ * incorporated all of Wojciech's changes, though I may at some
99
+ * future time.
100
+ *
101
+ * *** End of the housecleaning updates. Updated 08/08/07.
102
+ *
103
+ * @author Jon Abernathy <jon@chuggnutt.com>
104
+ * @version 1.0.0
105
+ * @since PHP 4.0.2
106
+ */
107
+ class html2text
108
+ {
109
+
110
+ /**
111
+ * Contains the HTML content to convert.
112
+ *
113
+ * @var string $html
114
+ * @access public
115
+ */
116
+ var $html;
117
+
118
+ /**
119
+ * Contains the converted, formatted text.
120
+ *
121
+ * @var string $text
122
+ * @access public
123
+ */
124
+ var $text;
125
+
126
+ /**
127
+ * Maximum width of the formatted text, in columns.
128
+ *
129
+ * Set this value to 0 (or less) to ignore word wrapping
130
+ * and not constrain text to a fixed-width column.
131
+ *
132
+ * @var integer $width
133
+ * @access public
134
+ */
135
+ var $width = 70;
136
+
137
+ /**
138
+ * List of preg* regular expression patterns to search for,
139
+ * used in conjunction with $replace.
140
+ *
141
+ * @var array $search
142
+ * @access public
143
+ * @see $replace
144
+ */
145
+ var $search = array(
146
+ "/\r/", // Non-legal carriage return
147
+ "/[\n\t]+/", // Newlines and tabs
148
+ '/[ ]{2,}/', // Runs of spaces, pre-handling
149
+ '/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with
150
+ '/<style[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with
151
+ //'/<!-- .* -->/', // Comments -- which strip_tags might have problem a with
152
+ '/<h[123][^>]*>(.*?)<\/h[123]>/ie', // H1 - H3
153
+ '/<h[456][^>]*>(.*?)<\/h[456]>/ie', // H4 - H6
154
+ '/<p[^>]*>/i', // <P>
155
+ '/<br[^>]*>/i', // <br>
156
+ '/<b[^>]*>(.*?)<\/b>/ie', // <b>
157
+ '/<strong[^>]*>(.*?)<\/strong>/ie', // <strong>
158
+ '/<i[^>]*>(.*?)<\/i>/i', // <i>
159
+ '/<em[^>]*>(.*?)<\/em>/i', // <em>
160
+ '/(<ul[^>]*>|<\/ul>)/i', // <ul> and </ul>
161
+ '/(<ol[^>]*>|<\/ol>)/i', // <ol> and </ol>
162
+ '/<li[^>]*>(.*?)<\/li>/i', // <li> and </li>
163
+ '/<li[^>]*>/i', // <li>
164
+ '/<a [^>]*href="([^"]+)"[^>]*>(.*?)<\/a>/ie',
165
+ // <a href="">
166
+ '/<hr[^>]*>/i', // <hr>
167
+ '/(<table[^>]*>|<\/table>)/i', // <table> and </table>
168
+ '/(<tr[^>]*>|<\/tr>)/i', // <tr> and </tr>
169
+ '/<td[^>]*>(.*?)<\/td>/i', // <td> and </td>
170
+ '/<th[^>]*>(.*?)<\/th>/ie', // <th> and </th>
171
+ '/&(nbsp|#160);/i', // Non-breaking space
172
+ '/&(quot|rdquo|ldquo|#8220|#8221|#147|#148);/i',
173
+ // Double quotes
174
+ '/&(apos|rsquo|lsquo|#8216|#8217);/i', // Single quotes
175
+ '/&gt;/i', // Greater-than
176
+ '/&lt;/i', // Less-than
177
+ '/&(amp|#38);/i', // Ampersand
178
+ '/&(copy|#169);/i', // Copyright
179
+ '/&(trade|#8482|#153);/i', // Trademark
180
+ '/&(reg|#174);/i', // Registered
181
+ '/&(mdash|#151|#8212);/i', // mdash
182
+ '/&(ndash|minus|#8211|#8722);/i', // ndash
183
+ '/&(bull|#149|#8226);/i', // Bullet
184
+ '/&(pound|#163);/i', // Pound sign
185
+ '/&(euro|#8364);/i', // Euro sign
186
+ '/&[^&;]+;/i', // Unknown/unhandled entities
187
+ '/[ ]{2,}/' // Runs of spaces, post-handling
188
+ );
189
+
190
+ /**
191
+ * List of pattern replacements corresponding to patterns searched.
192
+ *
193
+ * @var array $replace
194
+ * @access public
195
+ * @see $search
196
+ */
197
+ var $replace = array(
198
+ '', // Non-legal carriage return
199
+ ' ', // Newlines and tabs
200
+ ' ', // Runs of spaces, pre-handling
201
+ '', // <script>s -- which strip_tags supposedly has problems with
202
+ '', // <style>s -- which strip_tags supposedly has problems with
203
+ //'', // Comments -- which strip_tags might have problem a with
204
+ "strtoupper(\"\n\n\\1\n\n\")", // H1 - H3
205
+ "ucwords(\"\n\n\\1\n\n\")", // H4 - H6
206
+ "\n\n\t", // <P>
207
+ "\n", // <br>
208
+ 'strtoupper("\\1")', // <b>
209
+ 'strtoupper("\\1")', // <strong>
210
+ '_\\1_', // <i>
211
+ '_\\1_', // <em>
212
+ "\n\n", // <ul> and </ul>
213
+ "\n\n", // <ol> and </ol>
214
+ "\t* \\1\n", // <li> and </li>
215
+ "\n\t* ", // <li>
216
+ '$this->_build_link_list("\\1", "\\2")',
217
+ // <a href="">
218
+ "\n-------------------------\n", // <hr>
219
+ "\n\n", // <table> and </table>
220
+ "\n", // <tr> and </tr>
221
+ "\t\t\\1\n", // <td> and </td>
222
+ "strtoupper(\"\t\t\\1\n\")", // <th> and </th>
223
+ ' ', // Non-breaking space
224
+ '"', // Double quotes
225
+ "'", // Single quotes
226
+ '>',
227
+ '<',
228
+ '&',
229
+ '(c)',
230
+ '(tm)',
231
+ '(R)',
232
+ '--',
233
+ '-',
234
+ '*',
235
+ '�',
236
+ 'EUR', // Euro sign. � ?
237
+ '', // Unknown/unhandled entities
238
+ ' ' // Runs of spaces, post-handling
239
+ );
240
+
241
+ /**
242
+ * Contains a list of HTML tags to allow in the resulting text.
243
+ *
244
+ * @var string $allowed_tags
245
+ * @access public
246
+ * @see set_allowed_tags()
247
+ */
248
+ var $allowed_tags = '';
249
+
250
+ /**
251
+ * Contains the base URL that relative links should resolve to.
252
+ *
253
+ * @var string $url
254
+ * @access public
255
+ */
256
+ var $url;
257
+
258
+ /**
259
+ * Indicates whether content in the $html variable has been converted yet.
260
+ *
261
+ * @var boolean $_converted
262
+ * @access private
263
+ * @see $html, $text
264
+ */
265
+ var $_converted = false;
266
+
267
+ /**
268
+ * Contains URL addresses from links to be rendered in plain text.
269
+ *
270
+ * @var string $_link_list
271
+ * @access private
272
+ * @see _build_link_list()
273
+ */
274
+ var $_link_list = '';
275
+
276
+ /**
277
+ * Number of valid links detected in the text, used for plain text
278
+ * display (rendered similar to footnotes).
279
+ *
280
+ * @var integer $_link_count
281
+ * @access private
282
+ * @see _build_link_list()
283
+ */
284
+ var $_link_count = 0;
285
+
286
+ /**
287
+ * Constructor.
288
+ *
289
+ * If the HTML source string (or file) is supplied, the class
290
+ * will instantiate with that source propagated, all that has
291
+ * to be done it to call get_text().
292
+ *
293
+ * @param string $source HTML content
294
+ * @param boolean $from_file Indicates $source is a file to pull content from
295
+ * @access public
296
+ * @return void
297
+ */
298
+ function html2text( $source = '', $from_file = false )
299
+ {
300
+ if ( !empty($source) ) {
301
+ $this->set_html($source, $from_file);
302
+ }
303
+ $this->set_base_url();
304
+ }
305
+
306
+ /**
307
+ * Loads source HTML into memory, either from $source string or a file.
308
+ *
309
+ * @param string $source HTML content
310
+ * @param boolean $from_file Indicates $source is a file to pull content from
311
+ * @access public
312
+ * @return void
313
+ */
314
+ function set_html( $source, $from_file = false )
315
+ {
316
+ $this->html = $source;
317
+
318
+ if ( $from_file && file_exists($source) ) {
319
+ $fp = fopen($source, 'r');
320
+ $this->html = fread($fp, filesize($source));
321
+ fclose($fp);
322
+ }
323
+
324
+ $this->_converted = false;
325
+ }
326
+
327
+ /**
328
+ * Returns the text, converted from HTML.
329
+ *
330
+ * @access public
331
+ * @return string
332
+ */
333
+ function get_text()
334
+ {
335
+ if ( !$this->_converted ) {
336
+ $this->_convert();
337
+ }
338
+
339
+ return $this->text;
340
+ }
341
+
342
+ /**
343
+ * Prints the text, converted from HTML.
344
+ *
345
+ * @access public
346
+ * @return void
347
+ */
348
+ function print_text()
349
+ {
350
+ print $this->get_text();
351
+ }
352
+
353
+ /**
354
+ * Alias to print_text(), operates identically.
355
+ *
356
+ * @access public
357
+ * @return void
358
+ * @see print_text()
359
+ */
360
+ function p()
361
+ {
362
+ print $this->get_text();
363
+ }
364
+
365
+ /**
366
+ * Sets the allowed HTML tags to pass through to the resulting text.
367
+ *
368
+ * Tags should be in the form "<p>", with no corresponding closing tag.
369
+ *
370
+ * @access public
371
+ * @return void
372
+ */
373
+ function set_allowed_tags( $allowed_tags = '' )
374
+ {
375
+ if ( !empty($allowed_tags) ) {
376
+ $this->allowed_tags = $allowed_tags;
377
+ }
378
+ }
379
+
380
+ /**
381
+ * Sets a base URL to handle relative links.
382
+ *
383
+ * @access public
384
+ * @return void
385
+ */
386
+ function set_base_url( $url = '' )
387
+ {
388
+ if ( empty($url) ) {
389
+ if ( !empty($_SERVER['HTTP_HOST']) ) {
390
+ $this->url = 'http://' . $_SERVER['HTTP_HOST'];
391
+ } else {
392
+ $this->url = '';
393
+ }
394
+ } else {
395
+ // Strip any trailing slashes for consistency (relative
396
+ // URLs may already start with a slash like "/file.html")
397
+ if ( substr($url, -1) == '/' ) {
398
+ $url = substr($url, 0, -1);
399
+ }
400
+ $this->url = $url;
401
+ }
402
+ }
403
+
404
+ /**
405
+ * Workhorse function that does actual conversion.
406
+ *
407
+ * First performs custom tag replacement specified by $search and
408
+ * $replace arrays. Then strips any remaining HTML tags, reduces whitespace
409
+ * and newlines to a readable format, and word wraps the text to
410
+ * $width characters.
411
+ *
412
+ * @access private
413
+ * @return void
414
+ */
415
+ function _convert()
416
+ {
417
+ // Variables used for building the link list
418
+ $this->_link_count = 0;
419
+ $this->_link_list = '';
420
+
421
+ $text = trim(stripslashes($this->html));
422
+
423
+ // Run our defined search-and-replace
424
+ $text = preg_replace($this->search, $this->replace, $text);
425
+
426
+ // Strip any other HTML tags
427
+ $text = strip_tags($text, $this->allowed_tags);
428
+
429
+ // Bring down number of empty lines to 2 max
430
+ $text = preg_replace("/\n\s+\n/", "\n\n", $text);
431
+ $text = preg_replace("/[\n]{3,}/", "\n\n", $text);
432
+
433
+ // Add link list
434
+ if ( !empty($this->_link_list) ) {
435
+ $text .= "\n\nLinks:\n------\n" . $this->_link_list;
436
+ }
437
+
438
+ // Wrap the text to a readable format
439
+ // for PHP versions >= 4.0.2. Default width is 75
440
+ // If width is 0 or less, don't wrap the text.
441
+ if ( $this->width > 0 ) {
442
+ $text = wordwrap($text, $this->width);
443
+ }
444
+
445
+ $this->text = $text;
446
+
447
+ $this->_converted = true;
448
+ }
449
+
450
+ /**
451
+ * Helper function called by preg_replace() on link replacement.
452
+ *
453
+ * Maintains an internal list of links to be displayed at the end of the
454
+ * text, with numeric indices to the original point in the text they
455
+ * appeared. Also makes an effort at identifying and handling absolute
456
+ * and relative links.
457
+ *
458
+ * @param string $link URL of the link
459
+ * @param string $display Part of the text to associate number with
460
+ * @access private
461
+ * @return string
462
+ */
463
+ function _build_link_list( $link, $display )
464
+ {
465
+ if ( substr($link, 0, 7) == 'http://' || substr($link, 0, 8) == 'https://' ||
466
+ substr($link, 0, 7) == 'mailto:' ) {
467
+ $this->_link_count++;
468
+ $this->_link_list .= "[" . $this->_link_count . "] $link\n";
469
+ $additional = ' [' . $this->_link_count . ']';
470
+ } elseif ( substr($link, 0, 11) == 'javascript:' ) {
471
+ // Don't count the link; ignore it
472
+ $additional = '';
473
+ // what about href="#anchor" ?
474
+ } else {
475
+ $this->_link_count++;
476
+ $this->_link_list .= "[" . $this->_link_count . "] " . $this->url;
477
+ if ( substr($link, 0, 1) != '/' ) {
478
+ $this->_link_list .= '/';
479
+ }
480
+ $this->_link_list .= "$link\n";
481
+ $additional = ' [' . $this->_link_count . ']';
482
+ }
483
+
484
+ return $display . $additional;
485
+ }
486
+
487
+ }
488
+
489
+ ?>