com.googler.python 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (354) hide show
  1. package/package.json +4 -2
  2. package/python3.4.2/lib/python3.4/site-packages/pip/__init__.py +1 -277
  3. package/python3.4.2/lib/python3.4/site-packages/pip/__main__.py +19 -7
  4. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/__init__.py +246 -0
  5. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/basecommand.py +373 -0
  6. package/python3.4.2/lib/python3.4/site-packages/pip/{baseparser.py → _internal/baseparser.py} +240 -224
  7. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/build_env.py +92 -0
  8. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/cache.py +202 -0
  9. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/cmdoptions.py +609 -0
  10. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/commands/__init__.py +79 -0
  11. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/commands/check.py +42 -0
  12. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/commands/completion.py +94 -0
  13. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/commands/configuration.py +227 -0
  14. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/commands/download.py +233 -0
  15. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/commands/freeze.py +96 -0
  16. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/commands/hash.py +57 -0
  17. package/python3.4.2/lib/python3.4/site-packages/pip/{commands → _internal/commands}/help.py +36 -33
  18. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/commands/install.py +477 -0
  19. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/commands/list.py +343 -0
  20. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/commands/search.py +135 -0
  21. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/commands/show.py +164 -0
  22. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/commands/uninstall.py +71 -0
  23. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/commands/wheel.py +179 -0
  24. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/compat.py +235 -0
  25. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/configuration.py +378 -0
  26. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/download.py +922 -0
  27. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/exceptions.py +249 -0
  28. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/index.py +1117 -0
  29. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/locations.py +194 -0
  30. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/models/__init__.py +4 -0
  31. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/models/index.py +15 -0
  32. package/python3.4.2/lib/python3.4/site-packages/pip/{_vendor/requests/packages/urllib3/contrib → _internal/operations}/__init__.py +0 -0
  33. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/operations/check.py +106 -0
  34. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/operations/freeze.py +252 -0
  35. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/operations/prepare.py +378 -0
  36. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/pep425tags.py +317 -0
  37. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/req/__init__.py +69 -0
  38. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/req/req_file.py +338 -0
  39. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/req/req_install.py +1115 -0
  40. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/req/req_set.py +164 -0
  41. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/req/req_uninstall.py +455 -0
  42. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/resolve.py +354 -0
  43. package/python3.4.2/lib/python3.4/site-packages/pip/{status_codes.py → _internal/status_codes.py} +8 -6
  44. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/utils/__init__.py +0 -0
  45. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/utils/appdirs.py +258 -0
  46. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/utils/deprecation.py +77 -0
  47. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/utils/encoding.py +33 -0
  48. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/utils/filesystem.py +28 -0
  49. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/utils/glibc.py +84 -0
  50. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/utils/hashes.py +94 -0
  51. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/utils/logging.py +132 -0
  52. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/utils/misc.py +851 -0
  53. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/utils/outdated.py +163 -0
  54. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/utils/packaging.py +70 -0
  55. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/utils/setuptools_build.py +8 -0
  56. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/utils/temp_dir.py +82 -0
  57. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/utils/typing.py +29 -0
  58. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/utils/ui.py +421 -0
  59. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/vcs/__init__.py +471 -0
  60. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/vcs/bazaar.py +113 -0
  61. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/vcs/git.py +311 -0
  62. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/vcs/mercurial.py +105 -0
  63. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/vcs/subversion.py +271 -0
  64. package/python3.4.2/lib/python3.4/site-packages/pip/_internal/wheel.py +817 -0
  65. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/__init__.py +109 -8
  66. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/appdirs.py +604 -0
  67. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/cachecontrol/__init__.py +11 -0
  68. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/cachecontrol/_cmd.py +60 -0
  69. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/cachecontrol/adapter.py +134 -0
  70. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/cachecontrol/cache.py +39 -0
  71. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/cachecontrol/caches/__init__.py +2 -0
  72. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/cachecontrol/caches/file_cache.py +133 -0
  73. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/cachecontrol/caches/redis_cache.py +43 -0
  74. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/cachecontrol/compat.py +29 -0
  75. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/cachecontrol/controller.py +373 -0
  76. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/cachecontrol/filewrapper.py +78 -0
  77. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/cachecontrol/heuristics.py +138 -0
  78. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/cachecontrol/serialize.py +194 -0
  79. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/cachecontrol/wrapper.py +27 -0
  80. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/certifi/__init__.py +3 -0
  81. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/certifi/__main__.py +2 -0
  82. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests → certifi}/cacert.pem +1765 -2358
  83. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/certifi/core.py +37 -0
  84. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/__init__.py +39 -32
  85. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/big5freq.py +386 -0
  86. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/big5prober.py +47 -42
  87. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/chardistribution.py +233 -231
  88. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/charsetgroupprober.py +106 -0
  89. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/charsetprober.py +145 -0
  90. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/cli/__init__.py +1 -0
  91. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/cli/chardetect.py +85 -0
  92. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/codingstatemachine.py +88 -0
  93. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/compat.py +34 -34
  94. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/cp949prober.py +49 -44
  95. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/enums.py +76 -0
  96. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/escprober.py +101 -0
  97. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/escsm.py +246 -0
  98. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/eucjpprober.py +92 -0
  99. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/euckrfreq.py +195 -0
  100. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/euckrprober.py +47 -42
  101. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/euctwfreq.py +387 -428
  102. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/euctwprober.py +46 -41
  103. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/gb2312freq.py +283 -472
  104. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/gb2312prober.py +46 -41
  105. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/hebrewprober.py +292 -283
  106. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/jisfreq.py +325 -569
  107. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/jpcntx.py +233 -219
  108. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/langbulgarianmodel.py +228 -229
  109. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/langcyrillicmodel.py +333 -329
  110. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/langgreekmodel.py +225 -225
  111. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/langhebrewmodel.py +200 -201
  112. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/langhungarianmodel.py +225 -225
  113. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/langthaimodel.py +199 -200
  114. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/langturkishmodel.py +193 -0
  115. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/latin1prober.py +145 -139
  116. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/mbcharsetprober.py +91 -0
  117. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/mbcsgroupprober.py +54 -54
  118. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/mbcssm.py +572 -0
  119. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/sbcharsetprober.py +132 -0
  120. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/sbcsgroupprober.py +73 -69
  121. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/sjisprober.py +92 -0
  122. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/universaldetector.py +286 -0
  123. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/chardet → chardet}/utf8prober.py +82 -76
  124. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/chardet/version.py +9 -0
  125. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/colorama/__init__.py +7 -7
  126. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/colorama/ansi.py +102 -50
  127. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/colorama/ansitowin32.py +236 -190
  128. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/colorama/initialise.py +82 -56
  129. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/colorama/win32.py +156 -137
  130. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/colorama/winterm.py +162 -120
  131. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/__init__.py +23 -23
  132. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/_backport/__init__.py +6 -6
  133. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/_backport/misc.py +41 -41
  134. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/_backport/shutil.py +761 -761
  135. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/_backport/sysconfig.cfg +84 -84
  136. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/_backport/sysconfig.py +788 -788
  137. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/_backport/tarfile.py +2607 -2607
  138. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/compat.py +1117 -1064
  139. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/database.py +1318 -1301
  140. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/index.py +516 -488
  141. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/locators.py +1292 -1194
  142. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/manifest.py +393 -364
  143. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/markers.py +131 -190
  144. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/metadata.py +1068 -1026
  145. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/resources.py +355 -317
  146. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/scripts.py +415 -323
  147. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/t32.exe +0 -0
  148. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/t64.exe +0 -0
  149. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/util.py +1755 -1575
  150. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/version.py +736 -721
  151. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/w32.exe +0 -0
  152. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/w64.exe +0 -0
  153. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distlib/wheel.py +984 -958
  154. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/distro.py +1104 -0
  155. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/__init__.py +35 -23
  156. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/{ihatexml.py → _ihatexml.py} +288 -285
  157. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/{inputstream.py → _inputstream.py} +923 -881
  158. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/{tokenizer.py → _tokenizer.py} +1721 -1731
  159. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/{trie → _trie}/__init__.py +14 -12
  160. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/{trie → _trie}/_base.py +37 -37
  161. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/{trie → _trie}/datrie.py +44 -44
  162. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/{trie → _trie}/py.py +67 -67
  163. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/{utils.py → _utils.py} +124 -82
  164. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/constants.py +2947 -3104
  165. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py +29 -20
  166. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/filters/{_base.py → base.py} +12 -12
  167. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py +73 -65
  168. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/filters/lint.py +93 -93
  169. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/filters/optionaltags.py +207 -205
  170. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/filters/sanitizer.py +896 -12
  171. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/filters/whitespace.py +38 -38
  172. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/html5parser.py +2791 -2713
  173. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/serializer.py +409 -0
  174. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/treeadapters/__init__.py +30 -0
  175. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/treeadapters/genshi.py +54 -0
  176. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/treeadapters/sax.py +50 -44
  177. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/treebuilders/__init__.py +88 -76
  178. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/treebuilders/{_base.py → base.py} +417 -377
  179. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/treebuilders/dom.py +236 -227
  180. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/treebuilders/etree.py +340 -337
  181. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py +366 -369
  182. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py +154 -57
  183. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/treewalkers/{_base.py → base.py} +252 -200
  184. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/treewalkers/dom.py +43 -46
  185. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/treewalkers/etree.py +130 -138
  186. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/treewalkers/{lxmletree.py → etree_lxml.py} +213 -208
  187. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/treewalkers/{genshistream.py → genshi.py} +69 -69
  188. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/idna/__init__.py +2 -0
  189. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/idna/codec.py +118 -0
  190. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/idna/compat.py +12 -0
  191. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/idna/core.py +387 -0
  192. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/idna/idnadata.py +1585 -0
  193. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/idna/intranges.py +53 -0
  194. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/idna/package_data.py +2 -0
  195. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/idna/uts46data.py +7634 -0
  196. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/ipaddress.py +2419 -0
  197. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/lockfile/__init__.py +347 -0
  198. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/lockfile/linklockfile.py +73 -0
  199. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/lockfile/mkdirlockfile.py +84 -0
  200. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/lockfile/pidlockfile.py +190 -0
  201. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/lockfile/sqlitelockfile.py +156 -0
  202. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/lockfile/symlinklockfile.py +70 -0
  203. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/msgpack/__init__.py +66 -0
  204. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/msgpack/_version.py +1 -0
  205. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/msgpack/exceptions.py +41 -0
  206. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/msgpack/fallback.py +971 -0
  207. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/packaging/__about__.py +21 -0
  208. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/packaging/__init__.py +14 -0
  209. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/packaging/_compat.py +30 -0
  210. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/packaging/_structures.py +70 -0
  211. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/packaging/markers.py +301 -0
  212. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/packaging/requirements.py +130 -0
  213. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/packaging/specifiers.py +774 -0
  214. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/packaging/utils.py +63 -0
  215. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/packaging/version.py +441 -0
  216. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{pkg_resources.py → pkg_resources/__init__.py} +3125 -2762
  217. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/pkg_resources/py31compat.py +22 -0
  218. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/progress/__init__.py +127 -0
  219. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/progress/bar.py +88 -0
  220. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/progress/counter.py +48 -0
  221. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/progress/helpers.py +91 -0
  222. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/progress/spinner.py +44 -0
  223. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/pyparsing.py +5720 -0
  224. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/pytoml/__init__.py +3 -0
  225. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/pytoml/core.py +13 -0
  226. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/pytoml/parser.py +374 -0
  227. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/pytoml/writer.py +127 -0
  228. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/__init__.py +123 -77
  229. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/__version__.py +14 -0
  230. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/_internal_utils.py +42 -0
  231. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/adapters.py +525 -388
  232. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/api.py +152 -120
  233. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/auth.py +293 -193
  234. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/certs.py +18 -24
  235. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/compat.py +73 -115
  236. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/cookies.py +542 -454
  237. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/exceptions.py +122 -75
  238. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/help.py +120 -0
  239. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/hooks.py +34 -45
  240. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/models.py +948 -803
  241. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages.py +16 -0
  242. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/sessions.py +737 -637
  243. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/status_codes.py +91 -88
  244. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/structures.py +105 -127
  245. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/utils.py +904 -673
  246. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/retrying.py +267 -0
  247. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/six.py +891 -646
  248. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/__init__.py +97 -0
  249. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/_collections.py +319 -0
  250. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/connection.py +373 -0
  251. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/urllib3 → urllib3}/connectionpool.py +905 -710
  252. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/contrib/__init__.py +0 -0
  253. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__init__.py +0 -0
  254. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/contrib/_securetransport/bindings.py +593 -0
  255. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/contrib/_securetransport/low_level.py +343 -0
  256. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/contrib/appengine.py +296 -0
  257. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/urllib3 → urllib3}/contrib/ntlmpool.py +112 -120
  258. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/contrib/pyopenssl.py +455 -0
  259. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/contrib/securetransport.py +810 -0
  260. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/contrib/socks.py +188 -0
  261. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/exceptions.py +246 -0
  262. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/urllib3 → urllib3}/fields.py +178 -177
  263. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/urllib3 → urllib3}/filepost.py +94 -100
  264. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/urllib3 → urllib3}/packages/__init__.py +5 -4
  265. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/packages/backports/__init__.py +0 -0
  266. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/packages/backports/makefile.py +53 -0
  267. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/urllib3 → urllib3}/packages/ordered_dict.py +259 -260
  268. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/packages/six.py +868 -0
  269. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/urllib3 → urllib3}/packages/ssl_match_hostname/__init__.py +19 -13
  270. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/urllib3 → urllib3}/packages/ssl_match_hostname/_implementation.py +157 -105
  271. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/poolmanager.py +440 -0
  272. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/urllib3 → urllib3}/request.py +148 -141
  273. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/response.py +626 -0
  274. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/util/__init__.py +54 -0
  275. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/util/connection.py +130 -0
  276. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/util/request.py +118 -0
  277. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/util/response.py +81 -0
  278. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/util/retry.py +401 -0
  279. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/util/selectors.py +581 -0
  280. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/util/ssl_.py +341 -0
  281. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/urllib3 → urllib3}/util/timeout.py +242 -234
  282. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/{requests/packages/urllib3 → urllib3}/util/url.py +230 -162
  283. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/urllib3/util/wait.py +40 -0
  284. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/webencodings/__init__.py +342 -0
  285. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/webencodings/labels.py +231 -0
  286. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/webencodings/mklabels.py +59 -0
  287. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/webencodings/tests.py +153 -0
  288. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/webencodings/x_user_defined.py +325 -0
  289. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/_markerlib/__init__.py +0 -16
  290. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/_markerlib/markers.py +0 -119
  291. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/sanitizer.py +0 -271
  292. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/serializer/__init__.py +0 -16
  293. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/serializer/htmlserializer.py +0 -320
  294. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/html5lib/treewalkers/pulldom.py +0 -63
  295. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/re-vendor.py +0 -34
  296. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/__init__.py +0 -3
  297. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/chardet/big5freq.py +0 -925
  298. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/chardet/chardetect.py +0 -46
  299. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/chardet/charsetgroupprober.py +0 -106
  300. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/chardet/charsetprober.py +0 -62
  301. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/chardet/codingstatemachine.py +0 -61
  302. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/chardet/constants.py +0 -39
  303. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/chardet/escprober.py +0 -86
  304. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/chardet/escsm.py +0 -242
  305. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/chardet/eucjpprober.py +0 -90
  306. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/chardet/euckrfreq.py +0 -596
  307. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/chardet/mbcharsetprober.py +0 -86
  308. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/chardet/mbcssm.py +0 -575
  309. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/chardet/sbcharsetprober.py +0 -120
  310. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/chardet/sjisprober.py +0 -91
  311. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/chardet/universaldetector.py +0 -170
  312. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/urllib3/__init__.py +0 -58
  313. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/urllib3/_collections.py +0 -205
  314. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/urllib3/connection.py +0 -204
  315. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/urllib3/contrib/pyopenssl.py +0 -422
  316. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/urllib3/exceptions.py +0 -126
  317. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/urllib3/packages/six.py +0 -385
  318. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/urllib3/poolmanager.py +0 -258
  319. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/urllib3/response.py +0 -308
  320. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/urllib3/util/__init__.py +0 -27
  321. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/urllib3/util/connection.py +0 -45
  322. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/urllib3/util/request.py +0 -68
  323. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/urllib3/util/response.py +0 -13
  324. package/python3.4.2/lib/python3.4/site-packages/pip/_vendor/requests/packages/urllib3/util/ssl_.py +0 -133
  325. package/python3.4.2/lib/python3.4/site-packages/pip/backwardcompat/__init__.py +0 -138
  326. package/python3.4.2/lib/python3.4/site-packages/pip/basecommand.py +0 -201
  327. package/python3.4.2/lib/python3.4/site-packages/pip/cmdoptions.py +0 -371
  328. package/python3.4.2/lib/python3.4/site-packages/pip/commands/__init__.py +0 -88
  329. package/python3.4.2/lib/python3.4/site-packages/pip/commands/bundle.py +0 -42
  330. package/python3.4.2/lib/python3.4/site-packages/pip/commands/completion.py +0 -59
  331. package/python3.4.2/lib/python3.4/site-packages/pip/commands/freeze.py +0 -114
  332. package/python3.4.2/lib/python3.4/site-packages/pip/commands/install.py +0 -314
  333. package/python3.4.2/lib/python3.4/site-packages/pip/commands/list.py +0 -162
  334. package/python3.4.2/lib/python3.4/site-packages/pip/commands/search.py +0 -132
  335. package/python3.4.2/lib/python3.4/site-packages/pip/commands/show.py +0 -80
  336. package/python3.4.2/lib/python3.4/site-packages/pip/commands/uninstall.py +0 -59
  337. package/python3.4.2/lib/python3.4/site-packages/pip/commands/unzip.py +0 -7
  338. package/python3.4.2/lib/python3.4/site-packages/pip/commands/wheel.py +0 -195
  339. package/python3.4.2/lib/python3.4/site-packages/pip/commands/zip.py +0 -351
  340. package/python3.4.2/lib/python3.4/site-packages/pip/download.py +0 -644
  341. package/python3.4.2/lib/python3.4/site-packages/pip/exceptions.py +0 -46
  342. package/python3.4.2/lib/python3.4/site-packages/pip/index.py +0 -990
  343. package/python3.4.2/lib/python3.4/site-packages/pip/locations.py +0 -171
  344. package/python3.4.2/lib/python3.4/site-packages/pip/log.py +0 -276
  345. package/python3.4.2/lib/python3.4/site-packages/pip/pep425tags.py +0 -102
  346. package/python3.4.2/lib/python3.4/site-packages/pip/req.py +0 -1931
  347. package/python3.4.2/lib/python3.4/site-packages/pip/runner.py +0 -18
  348. package/python3.4.2/lib/python3.4/site-packages/pip/util.py +0 -720
  349. package/python3.4.2/lib/python3.4/site-packages/pip/vcs/__init__.py +0 -251
  350. package/python3.4.2/lib/python3.4/site-packages/pip/vcs/bazaar.py +0 -131
  351. package/python3.4.2/lib/python3.4/site-packages/pip/vcs/git.py +0 -194
  352. package/python3.4.2/lib/python3.4/site-packages/pip/vcs/mercurial.py +0 -151
  353. package/python3.4.2/lib/python3.4/site-packages/pip/vcs/subversion.py +0 -273
  354. package/python3.4.2/lib/python3.4/site-packages/pip/wheel.py +0 -560
@@ -1,881 +1,923 @@
1
- from __future__ import absolute_import, division, unicode_literals
2
- from pip._vendor.six import text_type
3
-
4
- import codecs
5
- import re
6
-
7
- from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
8
- from .constants import encodings, ReparseException
9
- from . import utils
10
-
11
- from io import StringIO
12
-
13
- try:
14
- from io import BytesIO
15
- except ImportError:
16
- BytesIO = StringIO
17
-
18
- try:
19
- from io import BufferedIOBase
20
- except ImportError:
21
- class BufferedIOBase(object):
22
- pass
23
-
24
- # Non-unicode versions of constants for use in the pre-parser
25
- spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
26
- asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
27
- asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
28
- spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
29
-
30
- invalid_unicode_re = re.compile("[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]")
31
-
32
- non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
33
- 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
34
- 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
35
- 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
36
- 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
37
- 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
38
- 0x10FFFE, 0x10FFFF])
39
-
40
- ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
41
-
42
- # Cache for charsUntil()
43
- charsUntilRegEx = {}
44
-
45
-
46
- class BufferedStream(object):
47
- """Buffering for streams that do not have buffering of their own
48
-
49
- The buffer is implemented as a list of chunks on the assumption that
50
- joining many strings will be slow since it is O(n**2)
51
- """
52
-
53
- def __init__(self, stream):
54
- self.stream = stream
55
- self.buffer = []
56
- self.position = [-1, 0] # chunk number, offset
57
-
58
- def tell(self):
59
- pos = 0
60
- for chunk in self.buffer[:self.position[0]]:
61
- pos += len(chunk)
62
- pos += self.position[1]
63
- return pos
64
-
65
- def seek(self, pos):
66
- assert pos <= self._bufferedBytes()
67
- offset = pos
68
- i = 0
69
- while len(self.buffer[i]) < offset:
70
- offset -= len(self.buffer[i])
71
- i += 1
72
- self.position = [i, offset]
73
-
74
- def read(self, bytes):
75
- if not self.buffer:
76
- return self._readStream(bytes)
77
- elif (self.position[0] == len(self.buffer) and
78
- self.position[1] == len(self.buffer[-1])):
79
- return self._readStream(bytes)
80
- else:
81
- return self._readFromBuffer(bytes)
82
-
83
- def _bufferedBytes(self):
84
- return sum([len(item) for item in self.buffer])
85
-
86
- def _readStream(self, bytes):
87
- data = self.stream.read(bytes)
88
- self.buffer.append(data)
89
- self.position[0] += 1
90
- self.position[1] = len(data)
91
- return data
92
-
93
- def _readFromBuffer(self, bytes):
94
- remainingBytes = bytes
95
- rv = []
96
- bufferIndex = self.position[0]
97
- bufferOffset = self.position[1]
98
- while bufferIndex < len(self.buffer) and remainingBytes != 0:
99
- assert remainingBytes > 0
100
- bufferedData = self.buffer[bufferIndex]
101
-
102
- if remainingBytes <= len(bufferedData) - bufferOffset:
103
- bytesToRead = remainingBytes
104
- self.position = [bufferIndex, bufferOffset + bytesToRead]
105
- else:
106
- bytesToRead = len(bufferedData) - bufferOffset
107
- self.position = [bufferIndex, len(bufferedData)]
108
- bufferIndex += 1
109
- rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
110
- remainingBytes -= bytesToRead
111
-
112
- bufferOffset = 0
113
-
114
- if remainingBytes:
115
- rv.append(self._readStream(remainingBytes))
116
-
117
- return b"".join(rv)
118
-
119
-
120
- def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
121
- if hasattr(source, "read"):
122
- isUnicode = isinstance(source.read(0), text_type)
123
- else:
124
- isUnicode = isinstance(source, text_type)
125
-
126
- if isUnicode:
127
- if encoding is not None:
128
- raise TypeError("Cannot explicitly set an encoding with a unicode string")
129
-
130
- return HTMLUnicodeInputStream(source)
131
- else:
132
- return HTMLBinaryInputStream(source, encoding, parseMeta, chardet)
133
-
134
-
135
- class HTMLUnicodeInputStream(object):
136
- """Provides a unicode stream of characters to the HTMLTokenizer.
137
-
138
- This class takes care of character encoding and removing or replacing
139
- incorrect byte-sequences and also provides column and line tracking.
140
-
141
- """
142
-
143
- _defaultChunkSize = 10240
144
-
145
- def __init__(self, source):
146
- """Initialises the HTMLInputStream.
147
-
148
- HTMLInputStream(source, [encoding]) -> Normalized stream from source
149
- for use by html5lib.
150
-
151
- source can be either a file-object, local filename or a string.
152
-
153
- The optional encoding parameter must be a string that indicates
154
- the encoding. If specified, that encoding will be used,
155
- regardless of any BOM or later declaration (such as in a meta
156
- element)
157
-
158
- parseMeta - Look for a <meta> element containing encoding information
159
-
160
- """
161
-
162
- # Craziness
163
- if len("\U0010FFFF") == 1:
164
- self.reportCharacterErrors = self.characterErrorsUCS4
165
- self.replaceCharactersRegexp = re.compile("[\uD800-\uDFFF]")
166
- else:
167
- self.reportCharacterErrors = self.characterErrorsUCS2
168
- self.replaceCharactersRegexp = re.compile("([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])")
169
-
170
- # List of where new lines occur
171
- self.newLines = [0]
172
-
173
- self.charEncoding = ("utf-8", "certain")
174
- self.dataStream = self.openStream(source)
175
-
176
- self.reset()
177
-
178
- def reset(self):
179
- self.chunk = ""
180
- self.chunkSize = 0
181
- self.chunkOffset = 0
182
- self.errors = []
183
-
184
- # number of (complete) lines in previous chunks
185
- self.prevNumLines = 0
186
- # number of columns in the last line of the previous chunk
187
- self.prevNumCols = 0
188
-
189
- # Deal with CR LF and surrogates split over chunk boundaries
190
- self._bufferedCharacter = None
191
-
192
- def openStream(self, source):
193
- """Produces a file object from source.
194
-
195
- source can be either a file object, local filename or a string.
196
-
197
- """
198
- # Already a file object
199
- if hasattr(source, 'read'):
200
- stream = source
201
- else:
202
- stream = StringIO(source)
203
-
204
- return stream
205
-
206
- def _position(self, offset):
207
- chunk = self.chunk
208
- nLines = chunk.count('\n', 0, offset)
209
- positionLine = self.prevNumLines + nLines
210
- lastLinePos = chunk.rfind('\n', 0, offset)
211
- if lastLinePos == -1:
212
- positionColumn = self.prevNumCols + offset
213
- else:
214
- positionColumn = offset - (lastLinePos + 1)
215
- return (positionLine, positionColumn)
216
-
217
- def position(self):
218
- """Returns (line, col) of the current position in the stream."""
219
- line, col = self._position(self.chunkOffset)
220
- return (line + 1, col)
221
-
222
- def char(self):
223
- """ Read one character from the stream or queue if available. Return
224
- EOF when EOF is reached.
225
- """
226
- # Read a new chunk from the input stream if necessary
227
- if self.chunkOffset >= self.chunkSize:
228
- if not self.readChunk():
229
- return EOF
230
-
231
- chunkOffset = self.chunkOffset
232
- char = self.chunk[chunkOffset]
233
- self.chunkOffset = chunkOffset + 1
234
-
235
- return char
236
-
237
- def readChunk(self, chunkSize=None):
238
- if chunkSize is None:
239
- chunkSize = self._defaultChunkSize
240
-
241
- self.prevNumLines, self.prevNumCols = self._position(self.chunkSize)
242
-
243
- self.chunk = ""
244
- self.chunkSize = 0
245
- self.chunkOffset = 0
246
-
247
- data = self.dataStream.read(chunkSize)
248
-
249
- # Deal with CR LF and surrogates broken across chunks
250
- if self._bufferedCharacter:
251
- data = self._bufferedCharacter + data
252
- self._bufferedCharacter = None
253
- elif not data:
254
- # We have no more data, bye-bye stream
255
- return False
256
-
257
- if len(data) > 1:
258
- lastv = ord(data[-1])
259
- if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF:
260
- self._bufferedCharacter = data[-1]
261
- data = data[:-1]
262
-
263
- self.reportCharacterErrors(data)
264
-
265
- # Replace invalid characters
266
- # Note U+0000 is dealt with in the tokenizer
267
- data = self.replaceCharactersRegexp.sub("\ufffd", data)
268
-
269
- data = data.replace("\r\n", "\n")
270
- data = data.replace("\r", "\n")
271
-
272
- self.chunk = data
273
- self.chunkSize = len(data)
274
-
275
- return True
276
-
277
- def characterErrorsUCS4(self, data):
278
- for i in range(len(invalid_unicode_re.findall(data))):
279
- self.errors.append("invalid-codepoint")
280
-
281
- def characterErrorsUCS2(self, data):
282
- # Someone picked the wrong compile option
283
- # You lose
284
- skip = False
285
- for match in invalid_unicode_re.finditer(data):
286
- if skip:
287
- continue
288
- codepoint = ord(match.group())
289
- pos = match.start()
290
- # Pretty sure there should be endianness issues here
291
- if utils.isSurrogatePair(data[pos:pos + 2]):
292
- # We have a surrogate pair!
293
- char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
294
- if char_val in non_bmp_invalid_codepoints:
295
- self.errors.append("invalid-codepoint")
296
- skip = True
297
- elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and
298
- pos == len(data) - 1):
299
- self.errors.append("invalid-codepoint")
300
- else:
301
- skip = False
302
- self.errors.append("invalid-codepoint")
303
-
304
- def charsUntil(self, characters, opposite=False):
305
- """ Returns a string of characters from the stream up to but not
306
- including any character in 'characters' or EOF. 'characters' must be
307
- a container that supports the 'in' method and iteration over its
308
- characters.
309
- """
310
-
311
- # Use a cache of regexps to find the required characters
312
- try:
313
- chars = charsUntilRegEx[(characters, opposite)]
314
- except KeyError:
315
- if __debug__:
316
- for c in characters:
317
- assert(ord(c) < 128)
318
- regex = "".join(["\\x%02x" % ord(c) for c in characters])
319
- if not opposite:
320
- regex = "^%s" % regex
321
- chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)
322
-
323
- rv = []
324
-
325
- while True:
326
- # Find the longest matching prefix
327
- m = chars.match(self.chunk, self.chunkOffset)
328
- if m is None:
329
- # If nothing matched, and it wasn't because we ran out of chunk,
330
- # then stop
331
- if self.chunkOffset != self.chunkSize:
332
- break
333
- else:
334
- end = m.end()
335
- # If not the whole chunk matched, return everything
336
- # up to the part that didn't match
337
- if end != self.chunkSize:
338
- rv.append(self.chunk[self.chunkOffset:end])
339
- self.chunkOffset = end
340
- break
341
- # If the whole remainder of the chunk matched,
342
- # use it all and read the next chunk
343
- rv.append(self.chunk[self.chunkOffset:])
344
- if not self.readChunk():
345
- # Reached EOF
346
- break
347
-
348
- r = "".join(rv)
349
- return r
350
-
351
- def unget(self, char):
352
- # Only one character is allowed to be ungotten at once - it must
353
- # be consumed again before any further call to unget
354
- if char is not None:
355
- if self.chunkOffset == 0:
356
- # unget is called quite rarely, so it's a good idea to do
357
- # more work here if it saves a bit of work in the frequently
358
- # called char and charsUntil.
359
- # So, just prepend the ungotten character onto the current
360
- # chunk:
361
- self.chunk = char + self.chunk
362
- self.chunkSize += 1
363
- else:
364
- self.chunkOffset -= 1
365
- assert self.chunk[self.chunkOffset] == char
366
-
367
-
368
- class HTMLBinaryInputStream(HTMLUnicodeInputStream):
369
- """Provides a unicode stream of characters to the HTMLTokenizer.
370
-
371
- This class takes care of character encoding and removing or replacing
372
- incorrect byte-sequences and also provides column and line tracking.
373
-
374
- """
375
-
376
- def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
377
- """Initialises the HTMLInputStream.
378
-
379
- HTMLInputStream(source, [encoding]) -> Normalized stream from source
380
- for use by html5lib.
381
-
382
- source can be either a file-object, local filename or a string.
383
-
384
- The optional encoding parameter must be a string that indicates
385
- the encoding. If specified, that encoding will be used,
386
- regardless of any BOM or later declaration (such as in a meta
387
- element)
388
-
389
- parseMeta - Look for a <meta> element containing encoding information
390
-
391
- """
392
- # Raw Stream - for unicode objects this will encode to utf-8 and set
393
- # self.charEncoding as appropriate
394
- self.rawStream = self.openStream(source)
395
-
396
- HTMLUnicodeInputStream.__init__(self, self.rawStream)
397
-
398
- self.charEncoding = (codecName(encoding), "certain")
399
-
400
- # Encoding Information
401
- # Number of bytes to use when looking for a meta element with
402
- # encoding information
403
- self.numBytesMeta = 512
404
- # Number of bytes to use when using detecting encoding using chardet
405
- self.numBytesChardet = 100
406
- # Encoding to use if no other information can be found
407
- self.defaultEncoding = "windows-1252"
408
-
409
- # Detect encoding iff no explicit "transport level" encoding is supplied
410
- if (self.charEncoding[0] is None):
411
- self.charEncoding = self.detectEncoding(parseMeta, chardet)
412
-
413
- # Call superclass
414
- self.reset()
415
-
416
- def reset(self):
417
- self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream,
418
- 'replace')
419
- HTMLUnicodeInputStream.reset(self)
420
-
421
- def openStream(self, source):
422
- """Produces a file object from source.
423
-
424
- source can be either a file object, local filename or a string.
425
-
426
- """
427
- # Already a file object
428
- if hasattr(source, 'read'):
429
- stream = source
430
- else:
431
- stream = BytesIO(source)
432
-
433
- try:
434
- stream.seek(stream.tell())
435
- except:
436
- stream = BufferedStream(stream)
437
-
438
- return stream
439
-
440
- def detectEncoding(self, parseMeta=True, chardet=True):
441
- # First look for a BOM
442
- # This will also read past the BOM if present
443
- encoding = self.detectBOM()
444
- confidence = "certain"
445
- # If there is no BOM need to look for meta elements with encoding
446
- # information
447
- if encoding is None and parseMeta:
448
- encoding = self.detectEncodingMeta()
449
- confidence = "tentative"
450
- # Guess with chardet, if avaliable
451
- if encoding is None and chardet:
452
- confidence = "tentative"
453
- try:
454
- try:
455
- from charade.universaldetector import UniversalDetector
456
- except ImportError:
457
- from chardet.universaldetector import UniversalDetector
458
- buffers = []
459
- detector = UniversalDetector()
460
- while not detector.done:
461
- buffer = self.rawStream.read(self.numBytesChardet)
462
- assert isinstance(buffer, bytes)
463
- if not buffer:
464
- break
465
- buffers.append(buffer)
466
- detector.feed(buffer)
467
- detector.close()
468
- encoding = detector.result['encoding']
469
- self.rawStream.seek(0)
470
- except ImportError:
471
- pass
472
- # If all else fails use the default encoding
473
- if encoding is None:
474
- confidence = "tentative"
475
- encoding = self.defaultEncoding
476
-
477
- # Substitute for equivalent encodings:
478
- encodingSub = {"iso-8859-1": "windows-1252"}
479
-
480
- if encoding.lower() in encodingSub:
481
- encoding = encodingSub[encoding.lower()]
482
-
483
- return encoding, confidence
484
-
485
- def changeEncoding(self, newEncoding):
486
- assert self.charEncoding[1] != "certain"
487
- newEncoding = codecName(newEncoding)
488
- if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"):
489
- newEncoding = "utf-8"
490
- if newEncoding is None:
491
- return
492
- elif newEncoding == self.charEncoding[0]:
493
- self.charEncoding = (self.charEncoding[0], "certain")
494
- else:
495
- self.rawStream.seek(0)
496
- self.reset()
497
- self.charEncoding = (newEncoding, "certain")
498
- raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
499
-
500
- def detectBOM(self):
501
- """Attempts to detect at BOM at the start of the stream. If
502
- an encoding can be determined from the BOM return the name of the
503
- encoding otherwise return None"""
504
- bomDict = {
505
- codecs.BOM_UTF8: 'utf-8',
506
- codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
507
- codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
508
- }
509
-
510
- # Go to beginning of file and read in 4 bytes
511
- string = self.rawStream.read(4)
512
- assert isinstance(string, bytes)
513
-
514
- # Try detecting the BOM using bytes from the string
515
- encoding = bomDict.get(string[:3]) # UTF-8
516
- seek = 3
517
- if not encoding:
518
- # Need to detect UTF-32 before UTF-16
519
- encoding = bomDict.get(string) # UTF-32
520
- seek = 4
521
- if not encoding:
522
- encoding = bomDict.get(string[:2]) # UTF-16
523
- seek = 2
524
-
525
- # Set the read position past the BOM if one was found, otherwise
526
- # set it to the start of the stream
527
- self.rawStream.seek(encoding and seek or 0)
528
-
529
- return encoding
530
-
531
- def detectEncodingMeta(self):
532
- """Report the encoding declared by the meta element
533
- """
534
- buffer = self.rawStream.read(self.numBytesMeta)
535
- assert isinstance(buffer, bytes)
536
- parser = EncodingParser(buffer)
537
- self.rawStream.seek(0)
538
- encoding = parser.getEncoding()
539
-
540
- if encoding in ("utf-16", "utf-16-be", "utf-16-le"):
541
- encoding = "utf-8"
542
-
543
- return encoding
544
-
545
-
546
- class EncodingBytes(bytes):
547
- """String-like object with an associated position and various extra methods
548
- If the position is ever greater than the string length then an exception is
549
- raised"""
550
- def __new__(self, value):
551
- assert isinstance(value, bytes)
552
- return bytes.__new__(self, value.lower())
553
-
554
- def __init__(self, value):
555
- self._position = -1
556
-
557
- def __iter__(self):
558
- return self
559
-
560
- def __next__(self):
561
- p = self._position = self._position + 1
562
- if p >= len(self):
563
- raise StopIteration
564
- elif p < 0:
565
- raise TypeError
566
- return self[p:p + 1]
567
-
568
- def next(self):
569
- # Py2 compat
570
- return self.__next__()
571
-
572
- def previous(self):
573
- p = self._position
574
- if p >= len(self):
575
- raise StopIteration
576
- elif p < 0:
577
- raise TypeError
578
- self._position = p = p - 1
579
- return self[p:p + 1]
580
-
581
- def setPosition(self, position):
582
- if self._position >= len(self):
583
- raise StopIteration
584
- self._position = position
585
-
586
- def getPosition(self):
587
- if self._position >= len(self):
588
- raise StopIteration
589
- if self._position >= 0:
590
- return self._position
591
- else:
592
- return None
593
-
594
- position = property(getPosition, setPosition)
595
-
596
- def getCurrentByte(self):
597
- return self[self.position:self.position + 1]
598
-
599
- currentByte = property(getCurrentByte)
600
-
601
- def skip(self, chars=spaceCharactersBytes):
602
- """Skip past a list of characters"""
603
- p = self.position # use property for the error-checking
604
- while p < len(self):
605
- c = self[p:p + 1]
606
- if c not in chars:
607
- self._position = p
608
- return c
609
- p += 1
610
- self._position = p
611
- return None
612
-
613
- def skipUntil(self, chars):
614
- p = self.position
615
- while p < len(self):
616
- c = self[p:p + 1]
617
- if c in chars:
618
- self._position = p
619
- return c
620
- p += 1
621
- self._position = p
622
- return None
623
-
624
- def matchBytes(self, bytes):
625
- """Look for a sequence of bytes at the start of a string. If the bytes
626
- are found return True and advance the position to the byte after the
627
- match. Otherwise return False and leave the position alone"""
628
- p = self.position
629
- data = self[p:p + len(bytes)]
630
- rv = data.startswith(bytes)
631
- if rv:
632
- self.position += len(bytes)
633
- return rv
634
-
635
- def jumpTo(self, bytes):
636
- """Look for the next sequence of bytes matching a given sequence. If
637
- a match is found advance the position to the last byte of the match"""
638
- newPosition = self[self.position:].find(bytes)
639
- if newPosition > -1:
640
- # XXX: This is ugly, but I can't see a nicer way to fix this.
641
- if self._position == -1:
642
- self._position = 0
643
- self._position += (newPosition + len(bytes) - 1)
644
- return True
645
- else:
646
- raise StopIteration
647
-
648
-
649
- class EncodingParser(object):
650
- """Mini parser for detecting character encoding from meta elements"""
651
-
652
- def __init__(self, data):
653
- """string - the data to work on for encoding detection"""
654
- self.data = EncodingBytes(data)
655
- self.encoding = None
656
-
657
- def getEncoding(self):
658
- methodDispatch = (
659
- (b"<!--", self.handleComment),
660
- (b"<meta", self.handleMeta),
661
- (b"</", self.handlePossibleEndTag),
662
- (b"<!", self.handleOther),
663
- (b"<?", self.handleOther),
664
- (b"<", self.handlePossibleStartTag))
665
- for byte in self.data:
666
- keepParsing = True
667
- for key, method in methodDispatch:
668
- if self.data.matchBytes(key):
669
- try:
670
- keepParsing = method()
671
- break
672
- except StopIteration:
673
- keepParsing = False
674
- break
675
- if not keepParsing:
676
- break
677
-
678
- return self.encoding
679
-
680
- def handleComment(self):
681
- """Skip over comments"""
682
- return self.data.jumpTo(b"-->")
683
-
684
- def handleMeta(self):
685
- if self.data.currentByte not in spaceCharactersBytes:
686
- # if we have <meta not followed by a space so just keep going
687
- return True
688
- # We have a valid meta element we want to search for attributes
689
- hasPragma = False
690
- pendingEncoding = None
691
- while True:
692
- # Try to find the next attribute after the current position
693
- attr = self.getAttribute()
694
- if attr is None:
695
- return True
696
- else:
697
- if attr[0] == b"http-equiv":
698
- hasPragma = attr[1] == b"content-type"
699
- if hasPragma and pendingEncoding is not None:
700
- self.encoding = pendingEncoding
701
- return False
702
- elif attr[0] == b"charset":
703
- tentativeEncoding = attr[1]
704
- codec = codecName(tentativeEncoding)
705
- if codec is not None:
706
- self.encoding = codec
707
- return False
708
- elif attr[0] == b"content":
709
- contentParser = ContentAttrParser(EncodingBytes(attr[1]))
710
- tentativeEncoding = contentParser.parse()
711
- if tentativeEncoding is not None:
712
- codec = codecName(tentativeEncoding)
713
- if codec is not None:
714
- if hasPragma:
715
- self.encoding = codec
716
- return False
717
- else:
718
- pendingEncoding = codec
719
-
720
- def handlePossibleStartTag(self):
721
- return self.handlePossibleTag(False)
722
-
723
- def handlePossibleEndTag(self):
724
- next(self.data)
725
- return self.handlePossibleTag(True)
726
-
727
- def handlePossibleTag(self, endTag):
728
- data = self.data
729
- if data.currentByte not in asciiLettersBytes:
730
- # If the next byte is not an ascii letter either ignore this
731
- # fragment (possible start tag case) or treat it according to
732
- # handleOther
733
- if endTag:
734
- data.previous()
735
- self.handleOther()
736
- return True
737
-
738
- c = data.skipUntil(spacesAngleBrackets)
739
- if c == b"<":
740
- # return to the first step in the overall "two step" algorithm
741
- # reprocessing the < byte
742
- data.previous()
743
- else:
744
- # Read all attributes
745
- attr = self.getAttribute()
746
- while attr is not None:
747
- attr = self.getAttribute()
748
- return True
749
-
750
- def handleOther(self):
751
- return self.data.jumpTo(b">")
752
-
753
- def getAttribute(self):
754
- """Return a name,value pair for the next attribute in the stream,
755
- if one is found, or None"""
756
- data = self.data
757
- # Step 1 (skip chars)
758
- c = data.skip(spaceCharactersBytes | frozenset([b"/"]))
759
- assert c is None or len(c) == 1
760
- # Step 2
761
- if c in (b">", None):
762
- return None
763
- # Step 3
764
- attrName = []
765
- attrValue = []
766
- # Step 4 attribute name
767
- while True:
768
- if c == b"=" and attrName:
769
- break
770
- elif c in spaceCharactersBytes:
771
- # Step 6!
772
- c = data.skip()
773
- break
774
- elif c in (b"/", b">"):
775
- return b"".join(attrName), b""
776
- elif c in asciiUppercaseBytes:
777
- attrName.append(c.lower())
778
- elif c is None:
779
- return None
780
- else:
781
- attrName.append(c)
782
- # Step 5
783
- c = next(data)
784
- # Step 7
785
- if c != b"=":
786
- data.previous()
787
- return b"".join(attrName), b""
788
- # Step 8
789
- next(data)
790
- # Step 9
791
- c = data.skip()
792
- # Step 10
793
- if c in (b"'", b'"'):
794
- # 10.1
795
- quoteChar = c
796
- while True:
797
- # 10.2
798
- c = next(data)
799
- # 10.3
800
- if c == quoteChar:
801
- next(data)
802
- return b"".join(attrName), b"".join(attrValue)
803
- # 10.4
804
- elif c in asciiUppercaseBytes:
805
- attrValue.append(c.lower())
806
- # 10.5
807
- else:
808
- attrValue.append(c)
809
- elif c == b">":
810
- return b"".join(attrName), b""
811
- elif c in asciiUppercaseBytes:
812
- attrValue.append(c.lower())
813
- elif c is None:
814
- return None
815
- else:
816
- attrValue.append(c)
817
- # Step 11
818
- while True:
819
- c = next(data)
820
- if c in spacesAngleBrackets:
821
- return b"".join(attrName), b"".join(attrValue)
822
- elif c in asciiUppercaseBytes:
823
- attrValue.append(c.lower())
824
- elif c is None:
825
- return None
826
- else:
827
- attrValue.append(c)
828
-
829
-
830
- class ContentAttrParser(object):
831
- def __init__(self, data):
832
- assert isinstance(data, bytes)
833
- self.data = data
834
-
835
- def parse(self):
836
- try:
837
- # Check if the attr name is charset
838
- # otherwise return
839
- self.data.jumpTo(b"charset")
840
- self.data.position += 1
841
- self.data.skip()
842
- if not self.data.currentByte == b"=":
843
- # If there is no = sign keep looking for attrs
844
- return None
845
- self.data.position += 1
846
- self.data.skip()
847
- # Look for an encoding between matching quote marks
848
- if self.data.currentByte in (b'"', b"'"):
849
- quoteMark = self.data.currentByte
850
- self.data.position += 1
851
- oldPosition = self.data.position
852
- if self.data.jumpTo(quoteMark):
853
- return self.data[oldPosition:self.data.position]
854
- else:
855
- return None
856
- else:
857
- # Unquoted value
858
- oldPosition = self.data.position
859
- try:
860
- self.data.skipUntil(spaceCharactersBytes)
861
- return self.data[oldPosition:self.data.position]
862
- except StopIteration:
863
- # Return the whole remaining value
864
- return self.data[oldPosition:]
865
- except StopIteration:
866
- return None
867
-
868
-
869
- def codecName(encoding):
870
- """Return the python codec name corresponding to an encoding or None if the
871
- string doesn't correspond to a valid encoding."""
872
- if isinstance(encoding, bytes):
873
- try:
874
- encoding = encoding.decode("ascii")
875
- except UnicodeDecodeError:
876
- return None
877
- if encoding:
878
- canonicalName = ascii_punctuation_re.sub("", encoding).lower()
879
- return encodings.get(canonicalName, None)
880
- else:
881
- return None
1
+ from __future__ import absolute_import, division, unicode_literals
2
+
3
+ from pip._vendor.six import text_type, binary_type
4
+ from pip._vendor.six.moves import http_client, urllib
5
+
6
+ import codecs
7
+ import re
8
+
9
+ from pip._vendor import webencodings
10
+
11
+ from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
12
+ from .constants import _ReparseException
13
+ from . import _utils
14
+
15
+ from io import StringIO
16
+
17
+ try:
18
+ from io import BytesIO
19
+ except ImportError:
20
+ BytesIO = StringIO
21
+
22
+ # Non-unicode versions of constants for use in the pre-parser
23
+ spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
24
+ asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
25
+ asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
26
+ spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
27
+
28
+
29
+ invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa
30
+
31
+ if _utils.supports_lone_surrogates:
32
+ # Use one extra step of indirection and create surrogates with
33
+ # eval. Not using this indirection would introduce an illegal
34
+ # unicode literal on platforms not supporting such lone
35
+ # surrogates.
36
+ assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
37
+ invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
38
+ eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used
39
+ "]")
40
+ else:
41
+ invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
42
+
43
+ non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
44
+ 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
45
+ 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
46
+ 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
47
+ 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
48
+ 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
49
+ 0x10FFFE, 0x10FFFF])
50
+
51
+ ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
52
+
53
+ # Cache for charsUntil()
54
+ charsUntilRegEx = {}
55
+
56
+
57
+ class BufferedStream(object):
58
+ """Buffering for streams that do not have buffering of their own
59
+
60
+ The buffer is implemented as a list of chunks on the assumption that
61
+ joining many strings will be slow since it is O(n**2)
62
+ """
63
+
64
+ def __init__(self, stream):
65
+ self.stream = stream
66
+ self.buffer = []
67
+ self.position = [-1, 0] # chunk number, offset
68
+
69
+ def tell(self):
70
+ pos = 0
71
+ for chunk in self.buffer[:self.position[0]]:
72
+ pos += len(chunk)
73
+ pos += self.position[1]
74
+ return pos
75
+
76
+ def seek(self, pos):
77
+ assert pos <= self._bufferedBytes()
78
+ offset = pos
79
+ i = 0
80
+ while len(self.buffer[i]) < offset:
81
+ offset -= len(self.buffer[i])
82
+ i += 1
83
+ self.position = [i, offset]
84
+
85
+ def read(self, bytes):
86
+ if not self.buffer:
87
+ return self._readStream(bytes)
88
+ elif (self.position[0] == len(self.buffer) and
89
+ self.position[1] == len(self.buffer[-1])):
90
+ return self._readStream(bytes)
91
+ else:
92
+ return self._readFromBuffer(bytes)
93
+
94
+ def _bufferedBytes(self):
95
+ return sum([len(item) for item in self.buffer])
96
+
97
+ def _readStream(self, bytes):
98
+ data = self.stream.read(bytes)
99
+ self.buffer.append(data)
100
+ self.position[0] += 1
101
+ self.position[1] = len(data)
102
+ return data
103
+
104
+ def _readFromBuffer(self, bytes):
105
+ remainingBytes = bytes
106
+ rv = []
107
+ bufferIndex = self.position[0]
108
+ bufferOffset = self.position[1]
109
+ while bufferIndex < len(self.buffer) and remainingBytes != 0:
110
+ assert remainingBytes > 0
111
+ bufferedData = self.buffer[bufferIndex]
112
+
113
+ if remainingBytes <= len(bufferedData) - bufferOffset:
114
+ bytesToRead = remainingBytes
115
+ self.position = [bufferIndex, bufferOffset + bytesToRead]
116
+ else:
117
+ bytesToRead = len(bufferedData) - bufferOffset
118
+ self.position = [bufferIndex, len(bufferedData)]
119
+ bufferIndex += 1
120
+ rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
121
+ remainingBytes -= bytesToRead
122
+
123
+ bufferOffset = 0
124
+
125
+ if remainingBytes:
126
+ rv.append(self._readStream(remainingBytes))
127
+
128
+ return b"".join(rv)
129
+
130
+
131
+ def HTMLInputStream(source, **kwargs):
132
+ # Work around Python bug #20007: read(0) closes the connection.
133
+ # http://bugs.python.org/issue20007
134
+ if (isinstance(source, http_client.HTTPResponse) or
135
+ # Also check for addinfourl wrapping HTTPResponse
136
+ (isinstance(source, urllib.response.addbase) and
137
+ isinstance(source.fp, http_client.HTTPResponse))):
138
+ isUnicode = False
139
+ elif hasattr(source, "read"):
140
+ isUnicode = isinstance(source.read(0), text_type)
141
+ else:
142
+ isUnicode = isinstance(source, text_type)
143
+
144
+ if isUnicode:
145
+ encodings = [x for x in kwargs if x.endswith("_encoding")]
146
+ if encodings:
147
+ raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings)
148
+
149
+ return HTMLUnicodeInputStream(source, **kwargs)
150
+ else:
151
+ return HTMLBinaryInputStream(source, **kwargs)
152
+
153
+
154
+ class HTMLUnicodeInputStream(object):
155
+ """Provides a unicode stream of characters to the HTMLTokenizer.
156
+
157
+ This class takes care of character encoding and removing or replacing
158
+ incorrect byte-sequences and also provides column and line tracking.
159
+
160
+ """
161
+
162
+ _defaultChunkSize = 10240
163
+
164
+ def __init__(self, source):
165
+ """Initialises the HTMLInputStream.
166
+
167
+ HTMLInputStream(source, [encoding]) -> Normalized stream from source
168
+ for use by html5lib.
169
+
170
+ source can be either a file-object, local filename or a string.
171
+
172
+ The optional encoding parameter must be a string that indicates
173
+ the encoding. If specified, that encoding will be used,
174
+ regardless of any BOM or later declaration (such as in a meta
175
+ element)
176
+
177
+ """
178
+
179
+ if not _utils.supports_lone_surrogates:
180
+ # Such platforms will have already checked for such
181
+ # surrogate errors, so no need to do this checking.
182
+ self.reportCharacterErrors = None
183
+ elif len("\U0010FFFF") == 1:
184
+ self.reportCharacterErrors = self.characterErrorsUCS4
185
+ else:
186
+ self.reportCharacterErrors = self.characterErrorsUCS2
187
+
188
+ # List of where new lines occur
189
+ self.newLines = [0]
190
+
191
+ self.charEncoding = (lookupEncoding("utf-8"), "certain")
192
+ self.dataStream = self.openStream(source)
193
+
194
+ self.reset()
195
+
196
+ def reset(self):
197
+ self.chunk = ""
198
+ self.chunkSize = 0
199
+ self.chunkOffset = 0
200
+ self.errors = []
201
+
202
+ # number of (complete) lines in previous chunks
203
+ self.prevNumLines = 0
204
+ # number of columns in the last line of the previous chunk
205
+ self.prevNumCols = 0
206
+
207
+ # Deal with CR LF and surrogates split over chunk boundaries
208
+ self._bufferedCharacter = None
209
+
210
+ def openStream(self, source):
211
+ """Produces a file object from source.
212
+
213
+ source can be either a file object, local filename or a string.
214
+
215
+ """
216
+ # Already a file object
217
+ if hasattr(source, 'read'):
218
+ stream = source
219
+ else:
220
+ stream = StringIO(source)
221
+
222
+ return stream
223
+
224
+ def _position(self, offset):
225
+ chunk = self.chunk
226
+ nLines = chunk.count('\n', 0, offset)
227
+ positionLine = self.prevNumLines + nLines
228
+ lastLinePos = chunk.rfind('\n', 0, offset)
229
+ if lastLinePos == -1:
230
+ positionColumn = self.prevNumCols + offset
231
+ else:
232
+ positionColumn = offset - (lastLinePos + 1)
233
+ return (positionLine, positionColumn)
234
+
235
+ def position(self):
236
+ """Returns (line, col) of the current position in the stream."""
237
+ line, col = self._position(self.chunkOffset)
238
+ return (line + 1, col)
239
+
240
+ def char(self):
241
+ """ Read one character from the stream or queue if available. Return
242
+ EOF when EOF is reached.
243
+ """
244
+ # Read a new chunk from the input stream if necessary
245
+ if self.chunkOffset >= self.chunkSize:
246
+ if not self.readChunk():
247
+ return EOF
248
+
249
+ chunkOffset = self.chunkOffset
250
+ char = self.chunk[chunkOffset]
251
+ self.chunkOffset = chunkOffset + 1
252
+
253
+ return char
254
+
255
+ def readChunk(self, chunkSize=None):
256
+ if chunkSize is None:
257
+ chunkSize = self._defaultChunkSize
258
+
259
+ self.prevNumLines, self.prevNumCols = self._position(self.chunkSize)
260
+
261
+ self.chunk = ""
262
+ self.chunkSize = 0
263
+ self.chunkOffset = 0
264
+
265
+ data = self.dataStream.read(chunkSize)
266
+
267
+ # Deal with CR LF and surrogates broken across chunks
268
+ if self._bufferedCharacter:
269
+ data = self._bufferedCharacter + data
270
+ self._bufferedCharacter = None
271
+ elif not data:
272
+ # We have no more data, bye-bye stream
273
+ return False
274
+
275
+ if len(data) > 1:
276
+ lastv = ord(data[-1])
277
+ if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF:
278
+ self._bufferedCharacter = data[-1]
279
+ data = data[:-1]
280
+
281
+ if self.reportCharacterErrors:
282
+ self.reportCharacterErrors(data)
283
+
284
+ # Replace invalid characters
285
+ data = data.replace("\r\n", "\n")
286
+ data = data.replace("\r", "\n")
287
+
288
+ self.chunk = data
289
+ self.chunkSize = len(data)
290
+
291
+ return True
292
+
293
+ def characterErrorsUCS4(self, data):
294
+ for _ in range(len(invalid_unicode_re.findall(data))):
295
+ self.errors.append("invalid-codepoint")
296
+
297
+ def characterErrorsUCS2(self, data):
298
+ # Someone picked the wrong compile option
299
+ # You lose
300
+ skip = False
301
+ for match in invalid_unicode_re.finditer(data):
302
+ if skip:
303
+ continue
304
+ codepoint = ord(match.group())
305
+ pos = match.start()
306
+ # Pretty sure there should be endianness issues here
307
+ if _utils.isSurrogatePair(data[pos:pos + 2]):
308
+ # We have a surrogate pair!
309
+ char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
310
+ if char_val in non_bmp_invalid_codepoints:
311
+ self.errors.append("invalid-codepoint")
312
+ skip = True
313
+ elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and
314
+ pos == len(data) - 1):
315
+ self.errors.append("invalid-codepoint")
316
+ else:
317
+ skip = False
318
+ self.errors.append("invalid-codepoint")
319
+
320
+ def charsUntil(self, characters, opposite=False):
321
+ """ Returns a string of characters from the stream up to but not
322
+ including any character in 'characters' or EOF. 'characters' must be
323
+ a container that supports the 'in' method and iteration over its
324
+ characters.
325
+ """
326
+
327
+ # Use a cache of regexps to find the required characters
328
+ try:
329
+ chars = charsUntilRegEx[(characters, opposite)]
330
+ except KeyError:
331
+ if __debug__:
332
+ for c in characters:
333
+ assert(ord(c) < 128)
334
+ regex = "".join(["\\x%02x" % ord(c) for c in characters])
335
+ if not opposite:
336
+ regex = "^%s" % regex
337
+ chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)
338
+
339
+ rv = []
340
+
341
+ while True:
342
+ # Find the longest matching prefix
343
+ m = chars.match(self.chunk, self.chunkOffset)
344
+ if m is None:
345
+ # If nothing matched, and it wasn't because we ran out of chunk,
346
+ # then stop
347
+ if self.chunkOffset != self.chunkSize:
348
+ break
349
+ else:
350
+ end = m.end()
351
+ # If not the whole chunk matched, return everything
352
+ # up to the part that didn't match
353
+ if end != self.chunkSize:
354
+ rv.append(self.chunk[self.chunkOffset:end])
355
+ self.chunkOffset = end
356
+ break
357
+ # If the whole remainder of the chunk matched,
358
+ # use it all and read the next chunk
359
+ rv.append(self.chunk[self.chunkOffset:])
360
+ if not self.readChunk():
361
+ # Reached EOF
362
+ break
363
+
364
+ r = "".join(rv)
365
+ return r
366
+
367
+ def unget(self, char):
368
+ # Only one character is allowed to be ungotten at once - it must
369
+ # be consumed again before any further call to unget
370
+ if char is not None:
371
+ if self.chunkOffset == 0:
372
+ # unget is called quite rarely, so it's a good idea to do
373
+ # more work here if it saves a bit of work in the frequently
374
+ # called char and charsUntil.
375
+ # So, just prepend the ungotten character onto the current
376
+ # chunk:
377
+ self.chunk = char + self.chunk
378
+ self.chunkSize += 1
379
+ else:
380
+ self.chunkOffset -= 1
381
+ assert self.chunk[self.chunkOffset] == char
382
+
383
+
384
+ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
385
+ """Provides a unicode stream of characters to the HTMLTokenizer.
386
+
387
+ This class takes care of character encoding and removing or replacing
388
+ incorrect byte-sequences and also provides column and line tracking.
389
+
390
+ """
391
+
392
+ def __init__(self, source, override_encoding=None, transport_encoding=None,
393
+ same_origin_parent_encoding=None, likely_encoding=None,
394
+ default_encoding="windows-1252", useChardet=True):
395
+ """Initialises the HTMLInputStream.
396
+
397
+ HTMLInputStream(source, [encoding]) -> Normalized stream from source
398
+ for use by html5lib.
399
+
400
+ source can be either a file-object, local filename or a string.
401
+
402
+ The optional encoding parameter must be a string that indicates
403
+ the encoding. If specified, that encoding will be used,
404
+ regardless of any BOM or later declaration (such as in a meta
405
+ element)
406
+
407
+ """
408
+ # Raw Stream - for unicode objects this will encode to utf-8 and set
409
+ # self.charEncoding as appropriate
410
+ self.rawStream = self.openStream(source)
411
+
412
+ HTMLUnicodeInputStream.__init__(self, self.rawStream)
413
+
414
+ # Encoding Information
415
+ # Number of bytes to use when looking for a meta element with
416
+ # encoding information
417
+ self.numBytesMeta = 1024
418
+ # Number of bytes to use when using detecting encoding using chardet
419
+ self.numBytesChardet = 100
420
+ # Things from args
421
+ self.override_encoding = override_encoding
422
+ self.transport_encoding = transport_encoding
423
+ self.same_origin_parent_encoding = same_origin_parent_encoding
424
+ self.likely_encoding = likely_encoding
425
+ self.default_encoding = default_encoding
426
+
427
+ # Determine encoding
428
+ self.charEncoding = self.determineEncoding(useChardet)
429
+ assert self.charEncoding[0] is not None
430
+
431
+ # Call superclass
432
+ self.reset()
433
+
434
+ def reset(self):
435
+ self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
436
+ HTMLUnicodeInputStream.reset(self)
437
+
438
+ def openStream(self, source):
439
+ """Produces a file object from source.
440
+
441
+ source can be either a file object, local filename or a string.
442
+
443
+ """
444
+ # Already a file object
445
+ if hasattr(source, 'read'):
446
+ stream = source
447
+ else:
448
+ stream = BytesIO(source)
449
+
450
+ try:
451
+ stream.seek(stream.tell())
452
+ except: # pylint:disable=bare-except
453
+ stream = BufferedStream(stream)
454
+
455
+ return stream
456
+
457
+ def determineEncoding(self, chardet=True):
458
+ # BOMs take precedence over everything
459
+ # This will also read past the BOM if present
460
+ charEncoding = self.detectBOM(), "certain"
461
+ if charEncoding[0] is not None:
462
+ return charEncoding
463
+
464
+ # If we've been overriden, we've been overriden
465
+ charEncoding = lookupEncoding(self.override_encoding), "certain"
466
+ if charEncoding[0] is not None:
467
+ return charEncoding
468
+
469
+ # Now check the transport layer
470
+ charEncoding = lookupEncoding(self.transport_encoding), "certain"
471
+ if charEncoding[0] is not None:
472
+ return charEncoding
473
+
474
+ # Look for meta elements with encoding information
475
+ charEncoding = self.detectEncodingMeta(), "tentative"
476
+ if charEncoding[0] is not None:
477
+ return charEncoding
478
+
479
+ # Parent document encoding
480
+ charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative"
481
+ if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"):
482
+ return charEncoding
483
+
484
+ # "likely" encoding
485
+ charEncoding = lookupEncoding(self.likely_encoding), "tentative"
486
+ if charEncoding[0] is not None:
487
+ return charEncoding
488
+
489
+ # Guess with chardet, if available
490
+ if chardet:
491
+ try:
492
+ from pip._vendor.chardet.universaldetector import UniversalDetector
493
+ except ImportError:
494
+ pass
495
+ else:
496
+ buffers = []
497
+ detector = UniversalDetector()
498
+ while not detector.done:
499
+ buffer = self.rawStream.read(self.numBytesChardet)
500
+ assert isinstance(buffer, bytes)
501
+ if not buffer:
502
+ break
503
+ buffers.append(buffer)
504
+ detector.feed(buffer)
505
+ detector.close()
506
+ encoding = lookupEncoding(detector.result['encoding'])
507
+ self.rawStream.seek(0)
508
+ if encoding is not None:
509
+ return encoding, "tentative"
510
+
511
+ # Try the default encoding
512
+ charEncoding = lookupEncoding(self.default_encoding), "tentative"
513
+ if charEncoding[0] is not None:
514
+ return charEncoding
515
+
516
+ # Fallback to html5lib's default if even that hasn't worked
517
+ return lookupEncoding("windows-1252"), "tentative"
518
+
519
+ def changeEncoding(self, newEncoding):
520
+ assert self.charEncoding[1] != "certain"
521
+ newEncoding = lookupEncoding(newEncoding)
522
+ if newEncoding is None:
523
+ return
524
+ if newEncoding.name in ("utf-16be", "utf-16le"):
525
+ newEncoding = lookupEncoding("utf-8")
526
+ assert newEncoding is not None
527
+ elif newEncoding == self.charEncoding[0]:
528
+ self.charEncoding = (self.charEncoding[0], "certain")
529
+ else:
530
+ self.rawStream.seek(0)
531
+ self.charEncoding = (newEncoding, "certain")
532
+ self.reset()
533
+ raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
534
+
535
+ def detectBOM(self):
536
+ """Attempts to detect at BOM at the start of the stream. If
537
+ an encoding can be determined from the BOM return the name of the
538
+ encoding otherwise return None"""
539
+ bomDict = {
540
+ codecs.BOM_UTF8: 'utf-8',
541
+ codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
542
+ codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
543
+ }
544
+
545
+ # Go to beginning of file and read in 4 bytes
546
+ string = self.rawStream.read(4)
547
+ assert isinstance(string, bytes)
548
+
549
+ # Try detecting the BOM using bytes from the string
550
+ encoding = bomDict.get(string[:3]) # UTF-8
551
+ seek = 3
552
+ if not encoding:
553
+ # Need to detect UTF-32 before UTF-16
554
+ encoding = bomDict.get(string) # UTF-32
555
+ seek = 4
556
+ if not encoding:
557
+ encoding = bomDict.get(string[:2]) # UTF-16
558
+ seek = 2
559
+
560
+ # Set the read position past the BOM if one was found, otherwise
561
+ # set it to the start of the stream
562
+ if encoding:
563
+ self.rawStream.seek(seek)
564
+ return lookupEncoding(encoding)
565
+ else:
566
+ self.rawStream.seek(0)
567
+ return None
568
+
569
+ def detectEncodingMeta(self):
570
+ """Report the encoding declared by the meta element
571
+ """
572
+ buffer = self.rawStream.read(self.numBytesMeta)
573
+ assert isinstance(buffer, bytes)
574
+ parser = EncodingParser(buffer)
575
+ self.rawStream.seek(0)
576
+ encoding = parser.getEncoding()
577
+
578
+ if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
579
+ encoding = lookupEncoding("utf-8")
580
+
581
+ return encoding
582
+
583
+
584
+ class EncodingBytes(bytes):
585
+ """String-like object with an associated position and various extra methods
586
+ If the position is ever greater than the string length then an exception is
587
+ raised"""
588
+ def __new__(self, value):
589
+ assert isinstance(value, bytes)
590
+ return bytes.__new__(self, value.lower())
591
+
592
+ def __init__(self, value):
593
+ # pylint:disable=unused-argument
594
+ self._position = -1
595
+
596
+ def __iter__(self):
597
+ return self
598
+
599
+ def __next__(self):
600
+ p = self._position = self._position + 1
601
+ if p >= len(self):
602
+ raise StopIteration
603
+ elif p < 0:
604
+ raise TypeError
605
+ return self[p:p + 1]
606
+
607
+ def next(self):
608
+ # Py2 compat
609
+ return self.__next__()
610
+
611
+ def previous(self):
612
+ p = self._position
613
+ if p >= len(self):
614
+ raise StopIteration
615
+ elif p < 0:
616
+ raise TypeError
617
+ self._position = p = p - 1
618
+ return self[p:p + 1]
619
+
620
+ def setPosition(self, position):
621
+ if self._position >= len(self):
622
+ raise StopIteration
623
+ self._position = position
624
+
625
+ def getPosition(self):
626
+ if self._position >= len(self):
627
+ raise StopIteration
628
+ if self._position >= 0:
629
+ return self._position
630
+ else:
631
+ return None
632
+
633
+ position = property(getPosition, setPosition)
634
+
635
+ def getCurrentByte(self):
636
+ return self[self.position:self.position + 1]
637
+
638
+ currentByte = property(getCurrentByte)
639
+
640
+ def skip(self, chars=spaceCharactersBytes):
641
+ """Skip past a list of characters"""
642
+ p = self.position # use property for the error-checking
643
+ while p < len(self):
644
+ c = self[p:p + 1]
645
+ if c not in chars:
646
+ self._position = p
647
+ return c
648
+ p += 1
649
+ self._position = p
650
+ return None
651
+
652
+ def skipUntil(self, chars):
653
+ p = self.position
654
+ while p < len(self):
655
+ c = self[p:p + 1]
656
+ if c in chars:
657
+ self._position = p
658
+ return c
659
+ p += 1
660
+ self._position = p
661
+ return None
662
+
663
+ def matchBytes(self, bytes):
664
+ """Look for a sequence of bytes at the start of a string. If the bytes
665
+ are found return True and advance the position to the byte after the
666
+ match. Otherwise return False and leave the position alone"""
667
+ p = self.position
668
+ data = self[p:p + len(bytes)]
669
+ rv = data.startswith(bytes)
670
+ if rv:
671
+ self.position += len(bytes)
672
+ return rv
673
+
674
+ def jumpTo(self, bytes):
675
+ """Look for the next sequence of bytes matching a given sequence. If
676
+ a match is found advance the position to the last byte of the match"""
677
+ newPosition = self[self.position:].find(bytes)
678
+ if newPosition > -1:
679
+ # XXX: This is ugly, but I can't see a nicer way to fix this.
680
+ if self._position == -1:
681
+ self._position = 0
682
+ self._position += (newPosition + len(bytes) - 1)
683
+ return True
684
+ else:
685
+ raise StopIteration
686
+
687
+
688
+ class EncodingParser(object):
689
+ """Mini parser for detecting character encoding from meta elements"""
690
+
691
+ def __init__(self, data):
692
+ """string - the data to work on for encoding detection"""
693
+ self.data = EncodingBytes(data)
694
+ self.encoding = None
695
+
696
+ def getEncoding(self):
697
+ methodDispatch = (
698
+ (b"<!--", self.handleComment),
699
+ (b"<meta", self.handleMeta),
700
+ (b"</", self.handlePossibleEndTag),
701
+ (b"<!", self.handleOther),
702
+ (b"<?", self.handleOther),
703
+ (b"<", self.handlePossibleStartTag))
704
+ for _ in self.data:
705
+ keepParsing = True
706
+ for key, method in methodDispatch:
707
+ if self.data.matchBytes(key):
708
+ try:
709
+ keepParsing = method()
710
+ break
711
+ except StopIteration:
712
+ keepParsing = False
713
+ break
714
+ if not keepParsing:
715
+ break
716
+
717
+ return self.encoding
718
+
719
+ def handleComment(self):
720
+ """Skip over comments"""
721
+ return self.data.jumpTo(b"-->")
722
+
723
+ def handleMeta(self):
724
+ if self.data.currentByte not in spaceCharactersBytes:
725
+ # if we have <meta not followed by a space so just keep going
726
+ return True
727
+ # We have a valid meta element we want to search for attributes
728
+ hasPragma = False
729
+ pendingEncoding = None
730
+ while True:
731
+ # Try to find the next attribute after the current position
732
+ attr = self.getAttribute()
733
+ if attr is None:
734
+ return True
735
+ else:
736
+ if attr[0] == b"http-equiv":
737
+ hasPragma = attr[1] == b"content-type"
738
+ if hasPragma and pendingEncoding is not None:
739
+ self.encoding = pendingEncoding
740
+ return False
741
+ elif attr[0] == b"charset":
742
+ tentativeEncoding = attr[1]
743
+ codec = lookupEncoding(tentativeEncoding)
744
+ if codec is not None:
745
+ self.encoding = codec
746
+ return False
747
+ elif attr[0] == b"content":
748
+ contentParser = ContentAttrParser(EncodingBytes(attr[1]))
749
+ tentativeEncoding = contentParser.parse()
750
+ if tentativeEncoding is not None:
751
+ codec = lookupEncoding(tentativeEncoding)
752
+ if codec is not None:
753
+ if hasPragma:
754
+ self.encoding = codec
755
+ return False
756
+ else:
757
+ pendingEncoding = codec
758
+
759
+ def handlePossibleStartTag(self):
760
+ return self.handlePossibleTag(False)
761
+
762
+ def handlePossibleEndTag(self):
763
+ next(self.data)
764
+ return self.handlePossibleTag(True)
765
+
766
+ def handlePossibleTag(self, endTag):
767
+ data = self.data
768
+ if data.currentByte not in asciiLettersBytes:
769
+ # If the next byte is not an ascii letter either ignore this
770
+ # fragment (possible start tag case) or treat it according to
771
+ # handleOther
772
+ if endTag:
773
+ data.previous()
774
+ self.handleOther()
775
+ return True
776
+
777
+ c = data.skipUntil(spacesAngleBrackets)
778
+ if c == b"<":
779
+ # return to the first step in the overall "two step" algorithm
780
+ # reprocessing the < byte
781
+ data.previous()
782
+ else:
783
+ # Read all attributes
784
+ attr = self.getAttribute()
785
+ while attr is not None:
786
+ attr = self.getAttribute()
787
+ return True
788
+
789
+ def handleOther(self):
790
+ return self.data.jumpTo(b">")
791
+
792
+ def getAttribute(self):
793
+ """Return a name,value pair for the next attribute in the stream,
794
+ if one is found, or None"""
795
+ data = self.data
796
+ # Step 1 (skip chars)
797
+ c = data.skip(spaceCharactersBytes | frozenset([b"/"]))
798
+ assert c is None or len(c) == 1
799
+ # Step 2
800
+ if c in (b">", None):
801
+ return None
802
+ # Step 3
803
+ attrName = []
804
+ attrValue = []
805
+ # Step 4 attribute name
806
+ while True:
807
+ if c == b"=" and attrName:
808
+ break
809
+ elif c in spaceCharactersBytes:
810
+ # Step 6!
811
+ c = data.skip()
812
+ break
813
+ elif c in (b"/", b">"):
814
+ return b"".join(attrName), b""
815
+ elif c in asciiUppercaseBytes:
816
+ attrName.append(c.lower())
817
+ elif c is None:
818
+ return None
819
+ else:
820
+ attrName.append(c)
821
+ # Step 5
822
+ c = next(data)
823
+ # Step 7
824
+ if c != b"=":
825
+ data.previous()
826
+ return b"".join(attrName), b""
827
+ # Step 8
828
+ next(data)
829
+ # Step 9
830
+ c = data.skip()
831
+ # Step 10
832
+ if c in (b"'", b'"'):
833
+ # 10.1
834
+ quoteChar = c
835
+ while True:
836
+ # 10.2
837
+ c = next(data)
838
+ # 10.3
839
+ if c == quoteChar:
840
+ next(data)
841
+ return b"".join(attrName), b"".join(attrValue)
842
+ # 10.4
843
+ elif c in asciiUppercaseBytes:
844
+ attrValue.append(c.lower())
845
+ # 10.5
846
+ else:
847
+ attrValue.append(c)
848
+ elif c == b">":
849
+ return b"".join(attrName), b""
850
+ elif c in asciiUppercaseBytes:
851
+ attrValue.append(c.lower())
852
+ elif c is None:
853
+ return None
854
+ else:
855
+ attrValue.append(c)
856
+ # Step 11
857
+ while True:
858
+ c = next(data)
859
+ if c in spacesAngleBrackets:
860
+ return b"".join(attrName), b"".join(attrValue)
861
+ elif c in asciiUppercaseBytes:
862
+ attrValue.append(c.lower())
863
+ elif c is None:
864
+ return None
865
+ else:
866
+ attrValue.append(c)
867
+
868
+
869
+ class ContentAttrParser(object):
870
+ def __init__(self, data):
871
+ assert isinstance(data, bytes)
872
+ self.data = data
873
+
874
+ def parse(self):
875
+ try:
876
+ # Check if the attr name is charset
877
+ # otherwise return
878
+ self.data.jumpTo(b"charset")
879
+ self.data.position += 1
880
+ self.data.skip()
881
+ if not self.data.currentByte == b"=":
882
+ # If there is no = sign keep looking for attrs
883
+ return None
884
+ self.data.position += 1
885
+ self.data.skip()
886
+ # Look for an encoding between matching quote marks
887
+ if self.data.currentByte in (b'"', b"'"):
888
+ quoteMark = self.data.currentByte
889
+ self.data.position += 1
890
+ oldPosition = self.data.position
891
+ if self.data.jumpTo(quoteMark):
892
+ return self.data[oldPosition:self.data.position]
893
+ else:
894
+ return None
895
+ else:
896
+ # Unquoted value
897
+ oldPosition = self.data.position
898
+ try:
899
+ self.data.skipUntil(spaceCharactersBytes)
900
+ return self.data[oldPosition:self.data.position]
901
+ except StopIteration:
902
+ # Return the whole remaining value
903
+ return self.data[oldPosition:]
904
+ except StopIteration:
905
+ return None
906
+
907
+
908
+ def lookupEncoding(encoding):
909
+ """Return the python codec name corresponding to an encoding or None if the
910
+ string doesn't correspond to a valid encoding."""
911
+ if isinstance(encoding, binary_type):
912
+ try:
913
+ encoding = encoding.decode("ascii")
914
+ except UnicodeDecodeError:
915
+ return None
916
+
917
+ if encoding is not None:
918
+ try:
919
+ return webencodings.lookup(encoding)
920
+ except AttributeError:
921
+ return None
922
+ else:
923
+ return None