polyfile-weave 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polyfile-weave might be problematic. Click here for more details.

Files changed (585) hide show
  1. polyfile/__init__.py +15 -0
  2. polyfile/__main__.py +394 -0
  3. polyfile/arithmetic.py +27 -0
  4. polyfile/ast.py +114 -0
  5. polyfile/debugger.py +1039 -0
  6. polyfile/expressions.py +346 -0
  7. polyfile/fileutils.py +343 -0
  8. polyfile/html.py +135 -0
  9. polyfile/http/__init__.py +1 -0
  10. polyfile/http/defacto.py +37 -0
  11. polyfile/http/deprecated.py +51 -0
  12. polyfile/http/experimental.py +67 -0
  13. polyfile/http/http_11.py +548 -0
  14. polyfile/http/matcher.py +37 -0
  15. polyfile/http/structured_headers.py +48 -0
  16. polyfile/iterators.py +72 -0
  17. polyfile/jpeg.py +24 -0
  18. polyfile/kaitai/__init__.py +0 -0
  19. polyfile/kaitai/compiler.py +156 -0
  20. polyfile/kaitai/parser.py +312 -0
  21. polyfile/kaitai/parsers/__init__.py +0 -0
  22. polyfile/kaitai/parsers/aix_utmp.py +116 -0
  23. polyfile/kaitai/parsers/allegro_dat.py +367 -0
  24. polyfile/kaitai/parsers/andes_firmware.py +64 -0
  25. polyfile/kaitai/parsers/android_bootldr_asus.py +105 -0
  26. polyfile/kaitai/parsers/android_bootldr_huawei.py +181 -0
  27. polyfile/kaitai/parsers/android_bootldr_qcom.py +217 -0
  28. polyfile/kaitai/parsers/android_dto.py +138 -0
  29. polyfile/kaitai/parsers/android_img.py +319 -0
  30. polyfile/kaitai/parsers/android_nanoapp_header.py +83 -0
  31. polyfile/kaitai/parsers/android_opengl_shaders_cache.py +151 -0
  32. polyfile/kaitai/parsers/android_sparse.py +237 -0
  33. polyfile/kaitai/parsers/android_super.py +401 -0
  34. polyfile/kaitai/parsers/apm_partition_table.py +196 -0
  35. polyfile/kaitai/parsers/apple_single_double.py +180 -0
  36. polyfile/kaitai/parsers/asn1_der.py +235 -0
  37. polyfile/kaitai/parsers/au.py +138 -0
  38. polyfile/kaitai/parsers/avantes_roh60.py +112 -0
  39. polyfile/kaitai/parsers/avi.py +296 -0
  40. polyfile/kaitai/parsers/bcd.py +111 -0
  41. polyfile/kaitai/parsers/bitcoin_transaction.py +210 -0
  42. polyfile/kaitai/parsers/blender_blend.py +334 -0
  43. polyfile/kaitai/parsers/bmp.py +780 -0
  44. polyfile/kaitai/parsers/bson.py +411 -0
  45. polyfile/kaitai/parsers/btrfs_stream.py +318 -0
  46. polyfile/kaitai/parsers/bytes_with_io.py +27 -0
  47. polyfile/kaitai/parsers/chrome_pak.py +194 -0
  48. polyfile/kaitai/parsers/code_6502.py +456 -0
  49. polyfile/kaitai/parsers/compressed_resource.py +217 -0
  50. polyfile/kaitai/parsers/cpio_old_le.py +154 -0
  51. polyfile/kaitai/parsers/cramfs.py +344 -0
  52. polyfile/kaitai/parsers/creative_voice_file.py +342 -0
  53. polyfile/kaitai/parsers/dbf.py +274 -0
  54. polyfile/kaitai/parsers/dcmp_0.py +664 -0
  55. polyfile/kaitai/parsers/dcmp_1.py +422 -0
  56. polyfile/kaitai/parsers/dcmp_2.py +312 -0
  57. polyfile/kaitai/parsers/dcmp_variable_length_integer.py +66 -0
  58. polyfile/kaitai/parsers/dex.py +1086 -0
  59. polyfile/kaitai/parsers/dicom.py +4370 -0
  60. polyfile/kaitai/parsers/dime_message.py +201 -0
  61. polyfile/kaitai/parsers/dns_packet.py +569 -0
  62. polyfile/kaitai/parsers/doom_wad.py +654 -0
  63. polyfile/kaitai/parsers/dos_datetime.py +191 -0
  64. polyfile/kaitai/parsers/dos_mz.py +172 -0
  65. polyfile/kaitai/parsers/ds_store.py +513 -0
  66. polyfile/kaitai/parsers/dtb.py +310 -0
  67. polyfile/kaitai/parsers/dune_2_pak.py +126 -0
  68. polyfile/kaitai/parsers/edid.py +472 -0
  69. polyfile/kaitai/parsers/efivar_signature_list.py +331 -0
  70. polyfile/kaitai/parsers/elf.py +2482 -0
  71. polyfile/kaitai/parsers/ethernet_frame.py +114 -0
  72. polyfile/kaitai/parsers/exif.py +723 -0
  73. polyfile/kaitai/parsers/ext2.py +537 -0
  74. polyfile/kaitai/parsers/fallout2_dat.py +187 -0
  75. polyfile/kaitai/parsers/fallout_dat.py +156 -0
  76. polyfile/kaitai/parsers/fasttracker_xm_module.py +558 -0
  77. polyfile/kaitai/parsers/ftl_dat.py +90 -0
  78. polyfile/kaitai/parsers/genmidi_op2.py +161 -0
  79. polyfile/kaitai/parsers/gettext_mo.py +541 -0
  80. polyfile/kaitai/parsers/gif.py +492 -0
  81. polyfile/kaitai/parsers/gimp_brush.py +244 -0
  82. polyfile/kaitai/parsers/glibc_utmp.py +114 -0
  83. polyfile/kaitai/parsers/gltf_binary.py +132 -0
  84. polyfile/kaitai/parsers/google_protobuf.py +151 -0
  85. polyfile/kaitai/parsers/gpt_partition_table.py +175 -0
  86. polyfile/kaitai/parsers/gran_turismo_vol.py +140 -0
  87. polyfile/kaitai/parsers/grub2_font.py +337 -0
  88. polyfile/kaitai/parsers/gzip.py +232 -0
  89. polyfile/kaitai/parsers/hashcat_restore.py +60 -0
  90. polyfile/kaitai/parsers/hccap.py +111 -0
  91. polyfile/kaitai/parsers/hccapx.py +103 -0
  92. polyfile/kaitai/parsers/heaps_pak.py +177 -0
  93. polyfile/kaitai/parsers/heroes_of_might_and_magic_agg.py +116 -0
  94. polyfile/kaitai/parsers/heroes_of_might_and_magic_bmp.py +34 -0
  95. polyfile/kaitai/parsers/icmp_packet.py +136 -0
  96. polyfile/kaitai/parsers/ico.py +129 -0
  97. polyfile/kaitai/parsers/id3v1_1.py +220 -0
  98. polyfile/kaitai/parsers/id3v2_3.py +324 -0
  99. polyfile/kaitai/parsers/id3v2_4.py +423 -0
  100. polyfile/kaitai/parsers/ines.py +282 -0
  101. polyfile/kaitai/parsers/ipv4_packet.py +158 -0
  102. polyfile/kaitai/parsers/ipv6_packet.py +55 -0
  103. polyfile/kaitai/parsers/iso9660.py +544 -0
  104. polyfile/kaitai/parsers/java_class.py +1113 -0
  105. polyfile/kaitai/parsers/jpeg.py +361 -0
  106. polyfile/kaitai/parsers/luks.py +149 -0
  107. polyfile/kaitai/parsers/lzh.py +165 -0
  108. polyfile/kaitai/parsers/mac_os_resource_snd.py +493 -0
  109. polyfile/kaitai/parsers/mach_o.py +3033 -0
  110. polyfile/kaitai/parsers/mach_o_fat.py +92 -0
  111. polyfile/kaitai/parsers/magicavoxel_vox.py +391 -0
  112. polyfile/kaitai/parsers/manifest.json +1 -0
  113. polyfile/kaitai/parsers/mbr_partition_table.py +119 -0
  114. polyfile/kaitai/parsers/mcap.py +1015 -0
  115. polyfile/kaitai/parsers/microsoft_cfb.py +293 -0
  116. polyfile/kaitai/parsers/microsoft_network_monitor_v2.py +309 -0
  117. polyfile/kaitai/parsers/microsoft_pe.py +765 -0
  118. polyfile/kaitai/parsers/mifare_classic.py +706 -0
  119. polyfile/kaitai/parsers/minecraft_nbt.py +449 -0
  120. polyfile/kaitai/parsers/monomakh_sapr_chg.py +69 -0
  121. polyfile/kaitai/parsers/mozilla_mar.py +239 -0
  122. polyfile/kaitai/parsers/mp4.py +333 -0
  123. polyfile/kaitai/parsers/msgpack.py +467 -0
  124. polyfile/kaitai/parsers/nitf.py +1189 -0
  125. polyfile/kaitai/parsers/nt_mdt_pal.py +155 -0
  126. polyfile/kaitai/parsers/ogg.py +118 -0
  127. polyfile/kaitai/parsers/openpgp_message.py +993 -0
  128. polyfile/kaitai/parsers/packet_ppi.py +515 -0
  129. polyfile/kaitai/parsers/pcap.py +344 -0
  130. polyfile/kaitai/parsers/pcf_font.py +506 -0
  131. polyfile/kaitai/parsers/pcx.py +195 -0
  132. polyfile/kaitai/parsers/pcx_dcx.py +79 -0
  133. polyfile/kaitai/parsers/phar_without_stub.py +399 -0
  134. polyfile/kaitai/parsers/php_serialized_value.py +505 -0
  135. polyfile/kaitai/parsers/png.py +721 -0
  136. polyfile/kaitai/parsers/protocol_body.py +260 -0
  137. polyfile/kaitai/parsers/psx_tim.py +104 -0
  138. polyfile/kaitai/parsers/python_pickle.py +718 -0
  139. polyfile/kaitai/parsers/python_pyc_27.py +510 -0
  140. polyfile/kaitai/parsers/quake_mdl.py +441 -0
  141. polyfile/kaitai/parsers/quake_pak.py +112 -0
  142. polyfile/kaitai/parsers/quicktime_mov.py +634 -0
  143. polyfile/kaitai/parsers/rar.py +265 -0
  144. polyfile/kaitai/parsers/regf.py +569 -0
  145. polyfile/kaitai/parsers/renderware_binary_stream.py +877 -0
  146. polyfile/kaitai/parsers/resource_fork.py +611 -0
  147. polyfile/kaitai/parsers/respack.py +57 -0
  148. polyfile/kaitai/parsers/riff.py +409 -0
  149. polyfile/kaitai/parsers/rpm.py +964 -0
  150. polyfile/kaitai/parsers/rtcp_payload.py +579 -0
  151. polyfile/kaitai/parsers/rtp_packet.py +150 -0
  152. polyfile/kaitai/parsers/rtpdump.py +115 -0
  153. polyfile/kaitai/parsers/ruby_marshal.py +423 -0
  154. polyfile/kaitai/parsers/s3m.py +493 -0
  155. polyfile/kaitai/parsers/saints_row_2_vpp_pc.py +254 -0
  156. polyfile/kaitai/parsers/shapefile_index.py +174 -0
  157. polyfile/kaitai/parsers/shapefile_main.py +893 -0
  158. polyfile/kaitai/parsers/some_ip.py +209 -0
  159. polyfile/kaitai/parsers/some_ip_container.py +37 -0
  160. polyfile/kaitai/parsers/some_ip_sd.py +86 -0
  161. polyfile/kaitai/parsers/some_ip_sd_entries.py +160 -0
  162. polyfile/kaitai/parsers/some_ip_sd_options.py +374 -0
  163. polyfile/kaitai/parsers/specpr.py +404 -0
  164. polyfile/kaitai/parsers/sqlite3.py +472 -0
  165. polyfile/kaitai/parsers/ssh_public_key.py +252 -0
  166. polyfile/kaitai/parsers/standard_midi_file.py +390 -0
  167. polyfile/kaitai/parsers/stl.py +111 -0
  168. polyfile/kaitai/parsers/sudoers_ts.py +201 -0
  169. polyfile/kaitai/parsers/swf.py +406 -0
  170. polyfile/kaitai/parsers/systemd_journal.py +361 -0
  171. polyfile/kaitai/parsers/tcp_segment.py +57 -0
  172. polyfile/kaitai/parsers/tga.py +213 -0
  173. polyfile/kaitai/parsers/tls_client_hello.py +293 -0
  174. polyfile/kaitai/parsers/tr_dos_image.py +322 -0
  175. polyfile/kaitai/parsers/tsm.py +198 -0
  176. polyfile/kaitai/parsers/ttf.py +1847 -0
  177. polyfile/kaitai/parsers/udp_datagram.py +42 -0
  178. polyfile/kaitai/parsers/uefi_te.py +236 -0
  179. polyfile/kaitai/parsers/uimage.py +198 -0
  180. polyfile/kaitai/parsers/utf8_string.py +137 -0
  181. polyfile/kaitai/parsers/vfat.py +410 -0
  182. polyfile/kaitai/parsers/vlq_base128_be.py +104 -0
  183. polyfile/kaitai/parsers/vlq_base128_le.py +129 -0
  184. polyfile/kaitai/parsers/vmware_vmdk.py +167 -0
  185. polyfile/kaitai/parsers/vp8_ivf.py +112 -0
  186. polyfile/kaitai/parsers/warcraft_2_pud.py +423 -0
  187. polyfile/kaitai/parsers/wav.py +1014 -0
  188. polyfile/kaitai/parsers/websocket.py +167 -0
  189. polyfile/kaitai/parsers/windows_evt_log.py +304 -0
  190. polyfile/kaitai/parsers/windows_lnk_file.py +467 -0
  191. polyfile/kaitai/parsers/windows_minidump.py +575 -0
  192. polyfile/kaitai/parsers/windows_resource_file.py +243 -0
  193. polyfile/kaitai/parsers/windows_shell_items.py +190 -0
  194. polyfile/kaitai/parsers/windows_systemtime.py +52 -0
  195. polyfile/kaitai/parsers/wmf.py +502 -0
  196. polyfile/kaitai/parsers/xar.py +181 -0
  197. polyfile/kaitai/parsers/xwd.py +189 -0
  198. polyfile/kaitai/parsers/zip.py +685 -0
  199. polyfile/kaitai/parsers/zisofs.py +158 -0
  200. polyfile/kaitai/parsers/zx_spectrum_tap.py +184 -0
  201. polyfile/kaitaimatcher.py +113 -0
  202. polyfile/languagematcher.py +217 -0
  203. polyfile/logger.py +135 -0
  204. polyfile/magic.py +2983 -0
  205. polyfile/magic_defs/COPYING +29 -0
  206. polyfile/magic_defs/__init__.py +0 -0
  207. polyfile/magic_defs/acorn +102 -0
  208. polyfile/magic_defs/adi +13 -0
  209. polyfile/magic_defs/adventure +122 -0
  210. polyfile/magic_defs/aes +29 -0
  211. polyfile/magic_defs/algol68 +35 -0
  212. polyfile/magic_defs/allegro +9 -0
  213. polyfile/magic_defs/alliant +18 -0
  214. polyfile/magic_defs/alpha +32 -0
  215. polyfile/magic_defs/amanda +12 -0
  216. polyfile/magic_defs/amigaos +218 -0
  217. polyfile/magic_defs/android +259 -0
  218. polyfile/magic_defs/animation +1197 -0
  219. polyfile/magic_defs/aout +46 -0
  220. polyfile/magic_defs/apache +28 -0
  221. polyfile/magic_defs/apl +7 -0
  222. polyfile/magic_defs/apple +773 -0
  223. polyfile/magic_defs/application +7 -0
  224. polyfile/magic_defs/applix +13 -0
  225. polyfile/magic_defs/apt +52 -0
  226. polyfile/magic_defs/archive +2586 -0
  227. polyfile/magic_defs/aria +38 -0
  228. polyfile/magic_defs/arm +50 -0
  229. polyfile/magic_defs/asf +132 -0
  230. polyfile/magic_defs/assembler +18 -0
  231. polyfile/magic_defs/asterix +18 -0
  232. polyfile/magic_defs/att3b +41 -0
  233. polyfile/magic_defs/audio +1291 -0
  234. polyfile/magic_defs/avm +33 -0
  235. polyfile/magic_defs/basis +18 -0
  236. polyfile/magic_defs/beetle +7 -0
  237. polyfile/magic_defs/ber +65 -0
  238. polyfile/magic_defs/bflt +14 -0
  239. polyfile/magic_defs/bhl +10 -0
  240. polyfile/magic_defs/bioinformatics +178 -0
  241. polyfile/magic_defs/biosig +154 -0
  242. polyfile/magic_defs/blackberry +8 -0
  243. polyfile/magic_defs/blcr +25 -0
  244. polyfile/magic_defs/blender +50 -0
  245. polyfile/magic_defs/blit +24 -0
  246. polyfile/magic_defs/bm +10 -0
  247. polyfile/magic_defs/bout +11 -0
  248. polyfile/magic_defs/bsdi +33 -0
  249. polyfile/magic_defs/bsi +10 -0
  250. polyfile/magic_defs/btsnoop +13 -0
  251. polyfile/magic_defs/burp +7 -0
  252. polyfile/magic_defs/bytecode +41 -0
  253. polyfile/magic_defs/c-lang +110 -0
  254. polyfile/magic_defs/c64 +531 -0
  255. polyfile/magic_defs/cad +437 -0
  256. polyfile/magic_defs/cafebabe +107 -0
  257. polyfile/magic_defs/cbor +21 -0
  258. polyfile/magic_defs/ccf +14 -0
  259. polyfile/magic_defs/cddb +12 -0
  260. polyfile/magic_defs/chord +15 -0
  261. polyfile/magic_defs/cisco +12 -0
  262. polyfile/magic_defs/citrus +12 -0
  263. polyfile/magic_defs/clarion +27 -0
  264. polyfile/magic_defs/claris +48 -0
  265. polyfile/magic_defs/clipper +65 -0
  266. polyfile/magic_defs/clojure +30 -0
  267. polyfile/magic_defs/coff +98 -0
  268. polyfile/magic_defs/commands +201 -0
  269. polyfile/magic_defs/communications +22 -0
  270. polyfile/magic_defs/compress +461 -0
  271. polyfile/magic_defs/console +1213 -0
  272. polyfile/magic_defs/convex +69 -0
  273. polyfile/magic_defs/coverage +91 -0
  274. polyfile/magic_defs/cracklib +14 -0
  275. polyfile/magic_defs/crypto +31 -0
  276. polyfile/magic_defs/csv +8 -0
  277. polyfile/magic_defs/ctags +6 -0
  278. polyfile/magic_defs/ctf +23 -0
  279. polyfile/magic_defs/cubemap +8 -0
  280. polyfile/magic_defs/cups +56 -0
  281. polyfile/magic_defs/dact +11 -0
  282. polyfile/magic_defs/database +886 -0
  283. polyfile/magic_defs/dataone +47 -0
  284. polyfile/magic_defs/dbpf +15 -0
  285. polyfile/magic_defs/der +146 -0
  286. polyfile/magic_defs/diamond +12 -0
  287. polyfile/magic_defs/dif +33 -0
  288. polyfile/magic_defs/diff +41 -0
  289. polyfile/magic_defs/digital +59 -0
  290. polyfile/magic_defs/dolby +69 -0
  291. polyfile/magic_defs/dsf +25 -0
  292. polyfile/magic_defs/dump +96 -0
  293. polyfile/magic_defs/dwarfs +45 -0
  294. polyfile/magic_defs/dyadic +61 -0
  295. polyfile/magic_defs/ebml +8 -0
  296. polyfile/magic_defs/edid +11 -0
  297. polyfile/magic_defs/editors +43 -0
  298. polyfile/magic_defs/efi +15 -0
  299. polyfile/magic_defs/elf +379 -0
  300. polyfile/magic_defs/encore +22 -0
  301. polyfile/magic_defs/epoc +62 -0
  302. polyfile/magic_defs/erlang +21 -0
  303. polyfile/magic_defs/espressif +57 -0
  304. polyfile/magic_defs/esri +28 -0
  305. polyfile/magic_defs/etf +33 -0
  306. polyfile/magic_defs/fcs +9 -0
  307. polyfile/magic_defs/filesystems +2694 -0
  308. polyfile/magic_defs/finger +16 -0
  309. polyfile/magic_defs/firmware +133 -0
  310. polyfile/magic_defs/flash +62 -0
  311. polyfile/magic_defs/flif +36 -0
  312. polyfile/magic_defs/fonts +449 -0
  313. polyfile/magic_defs/forth +82 -0
  314. polyfile/magic_defs/fortran +9 -0
  315. polyfile/magic_defs/frame +62 -0
  316. polyfile/magic_defs/freebsd +164 -0
  317. polyfile/magic_defs/fsav +128 -0
  318. polyfile/magic_defs/fusecompress +12 -0
  319. polyfile/magic_defs/games +696 -0
  320. polyfile/magic_defs/gcc +17 -0
  321. polyfile/magic_defs/gconv +10 -0
  322. polyfile/magic_defs/gentoo +85 -0
  323. polyfile/magic_defs/geo +166 -0
  324. polyfile/magic_defs/geos +20 -0
  325. polyfile/magic_defs/gimp +77 -0
  326. polyfile/magic_defs/git +13 -0
  327. polyfile/magic_defs/glibc +21 -0
  328. polyfile/magic_defs/gnome +59 -0
  329. polyfile/magic_defs/gnu +173 -0
  330. polyfile/magic_defs/gnumeric +8 -0
  331. polyfile/magic_defs/gpt +240 -0
  332. polyfile/magic_defs/gpu +28 -0
  333. polyfile/magic_defs/grace +21 -0
  334. polyfile/magic_defs/graphviz +12 -0
  335. polyfile/magic_defs/gringotts +48 -0
  336. polyfile/magic_defs/guile +13 -0
  337. polyfile/magic_defs/hardware +12 -0
  338. polyfile/magic_defs/hitachi-sh +30 -0
  339. polyfile/magic_defs/hp +433 -0
  340. polyfile/magic_defs/human68k +26 -0
  341. polyfile/magic_defs/ibm370 +52 -0
  342. polyfile/magic_defs/ibm6000 +35 -0
  343. polyfile/magic_defs/icc +214 -0
  344. polyfile/magic_defs/iff +80 -0
  345. polyfile/magic_defs/images +4210 -0
  346. polyfile/magic_defs/inform +9 -0
  347. polyfile/magic_defs/intel +310 -0
  348. polyfile/magic_defs/interleaf +9 -0
  349. polyfile/magic_defs/island +10 -0
  350. polyfile/magic_defs/ispell +63 -0
  351. polyfile/magic_defs/isz +15 -0
  352. polyfile/magic_defs/java +52 -0
  353. polyfile/magic_defs/javascript +171 -0
  354. polyfile/magic_defs/jpeg +252 -0
  355. polyfile/magic_defs/json +8 -0
  356. polyfile/magic_defs/karma +9 -0
  357. polyfile/magic_defs/kde +11 -0
  358. polyfile/magic_defs/keepass +20 -0
  359. polyfile/magic_defs/kerberos +45 -0
  360. polyfile/magic_defs/kicad +85 -0
  361. polyfile/magic_defs/kml +34 -0
  362. polyfile/magic_defs/lammps +64 -0
  363. polyfile/magic_defs/lecter +6 -0
  364. polyfile/magic_defs/lex +12 -0
  365. polyfile/magic_defs/lif +50 -0
  366. polyfile/magic_defs/linux +557 -0
  367. polyfile/magic_defs/lisp +78 -0
  368. polyfile/magic_defs/llvm +22 -0
  369. polyfile/magic_defs/locoscript +12 -0
  370. polyfile/magic_defs/lua +31 -0
  371. polyfile/magic_defs/luks +126 -0
  372. polyfile/magic_defs/m4 +11 -0
  373. polyfile/magic_defs/mach +303 -0
  374. polyfile/magic_defs/macintosh +505 -0
  375. polyfile/magic_defs/macos +7 -0
  376. polyfile/magic_defs/magic +10 -0
  377. polyfile/magic_defs/magic.mgc +0 -0
  378. polyfile/magic_defs/mail.news +132 -0
  379. polyfile/magic_defs/make +21 -0
  380. polyfile/magic_defs/map +413 -0
  381. polyfile/magic_defs/maple +109 -0
  382. polyfile/magic_defs/marc21 +30 -0
  383. polyfile/magic_defs/mathcad +8 -0
  384. polyfile/magic_defs/mathematica +188 -0
  385. polyfile/magic_defs/matroska +17 -0
  386. polyfile/magic_defs/mcrypt +52 -0
  387. polyfile/magic_defs/measure +44 -0
  388. polyfile/magic_defs/mercurial +13 -0
  389. polyfile/magic_defs/metastore +8 -0
  390. polyfile/magic_defs/meteorological +53 -0
  391. polyfile/magic_defs/microfocus +21 -0
  392. polyfile/magic_defs/mime +9 -0
  393. polyfile/magic_defs/mips +120 -0
  394. polyfile/magic_defs/mirage +8 -0
  395. polyfile/magic_defs/misctools +140 -0
  396. polyfile/magic_defs/mkid +11 -0
  397. polyfile/magic_defs/mlssa +8 -0
  398. polyfile/magic_defs/mmdf +6 -0
  399. polyfile/magic_defs/modem +92 -0
  400. polyfile/magic_defs/modulefile +9 -0
  401. polyfile/magic_defs/motorola +71 -0
  402. polyfile/magic_defs/mozilla +37 -0
  403. polyfile/magic_defs/msdos +2304 -0
  404. polyfile/magic_defs/msooxml +68 -0
  405. polyfile/magic_defs/msvc +222 -0
  406. polyfile/magic_defs/msx +309 -0
  407. polyfile/magic_defs/mup +24 -0
  408. polyfile/magic_defs/music +17 -0
  409. polyfile/magic_defs/nasa +7 -0
  410. polyfile/magic_defs/natinst +24 -0
  411. polyfile/magic_defs/ncr +49 -0
  412. polyfile/magic_defs/neko +12 -0
  413. polyfile/magic_defs/netbsd +251 -0
  414. polyfile/magic_defs/netscape +26 -0
  415. polyfile/magic_defs/netware +11 -0
  416. polyfile/magic_defs/news +13 -0
  417. polyfile/magic_defs/nifty +202 -0
  418. polyfile/magic_defs/nim-lang +29 -0
  419. polyfile/magic_defs/nitpicker +14 -0
  420. polyfile/magic_defs/numpy +9 -0
  421. polyfile/magic_defs/oasis +12 -0
  422. polyfile/magic_defs/ocaml +14 -0
  423. polyfile/magic_defs/octave +6 -0
  424. polyfile/magic_defs/ole2compounddocs +760 -0
  425. polyfile/magic_defs/olf +98 -0
  426. polyfile/magic_defs/openfst +17 -0
  427. polyfile/magic_defs/opentimestamps +16 -0
  428. polyfile/magic_defs/oric +16 -0
  429. polyfile/magic_defs/os2 +186 -0
  430. polyfile/magic_defs/os400 +39 -0
  431. polyfile/magic_defs/os9 +80 -0
  432. polyfile/magic_defs/osf1 +10 -0
  433. polyfile/magic_defs/palm +156 -0
  434. polyfile/magic_defs/parix +13 -0
  435. polyfile/magic_defs/parrot +22 -0
  436. polyfile/magic_defs/pascal +39 -0
  437. polyfile/magic_defs/pbf +11 -0
  438. polyfile/magic_defs/pbm +8 -0
  439. polyfile/magic_defs/pc88 +24 -0
  440. polyfile/magic_defs/pc98 +77 -0
  441. polyfile/magic_defs/pci_ids +116 -0
  442. polyfile/magic_defs/pcjr +8 -0
  443. polyfile/magic_defs/pdf +51 -0
  444. polyfile/magic_defs/pdp +42 -0
  445. polyfile/magic_defs/perl +100 -0
  446. polyfile/magic_defs/pgf +52 -0
  447. polyfile/magic_defs/pgp +581 -0
  448. polyfile/magic_defs/pgp-binary-keys +388 -0
  449. polyfile/magic_defs/pkgadd +7 -0
  450. polyfile/magic_defs/plan9 +25 -0
  451. polyfile/magic_defs/playdate +57 -0
  452. polyfile/magic_defs/plus5 +18 -0
  453. polyfile/magic_defs/pmem +46 -0
  454. polyfile/magic_defs/polyfile_zip +5 -0
  455. polyfile/magic_defs/polyml +23 -0
  456. polyfile/magic_defs/printer +269 -0
  457. polyfile/magic_defs/project +10 -0
  458. polyfile/magic_defs/psdbms +14 -0
  459. polyfile/magic_defs/psl +14 -0
  460. polyfile/magic_defs/pulsar +13 -0
  461. polyfile/magic_defs/puzzle +17 -0
  462. polyfile/magic_defs/pwsafe +14 -0
  463. polyfile/magic_defs/pyramid +12 -0
  464. polyfile/magic_defs/python +305 -0
  465. polyfile/magic_defs/qt +30 -0
  466. polyfile/magic_defs/revision +66 -0
  467. polyfile/magic_defs/riff +840 -0
  468. polyfile/magic_defs/rinex +44 -0
  469. polyfile/magic_defs/ringdove +45 -0
  470. polyfile/magic_defs/rpi +52 -0
  471. polyfile/magic_defs/rpm +45 -0
  472. polyfile/magic_defs/rpmsg +7 -0
  473. polyfile/magic_defs/rst +11 -0
  474. polyfile/magic_defs/rtf +94 -0
  475. polyfile/magic_defs/ruby +55 -0
  476. polyfile/magic_defs/rust +21 -0
  477. polyfile/magic_defs/sc +7 -0
  478. polyfile/magic_defs/sccs +24 -0
  479. polyfile/magic_defs/scientific +144 -0
  480. polyfile/magic_defs/securitycerts +6 -0
  481. polyfile/magic_defs/selinux +24 -0
  482. polyfile/magic_defs/sendmail +37 -0
  483. polyfile/magic_defs/sequent +42 -0
  484. polyfile/magic_defs/sereal +35 -0
  485. polyfile/magic_defs/sgi +144 -0
  486. polyfile/magic_defs/sgml +161 -0
  487. polyfile/magic_defs/sharc +23 -0
  488. polyfile/magic_defs/sinclair +40 -0
  489. polyfile/magic_defs/sisu +18 -0
  490. polyfile/magic_defs/sketch +6 -0
  491. polyfile/magic_defs/smalltalk +25 -0
  492. polyfile/magic_defs/smile +34 -0
  493. polyfile/magic_defs/sniffer +482 -0
  494. polyfile/magic_defs/softquad +40 -0
  495. polyfile/magic_defs/sosi +40 -0
  496. polyfile/magic_defs/spec +21 -0
  497. polyfile/magic_defs/spectrum +184 -0
  498. polyfile/magic_defs/sql +288 -0
  499. polyfile/magic_defs/ssh +39 -0
  500. polyfile/magic_defs/ssl +20 -0
  501. polyfile/magic_defs/statistics +45 -0
  502. polyfile/magic_defs/subtitle +38 -0
  503. polyfile/magic_defs/sun +141 -0
  504. polyfile/magic_defs/svf +5 -0
  505. polyfile/magic_defs/sylk +36 -0
  506. polyfile/magic_defs/symbos +42 -0
  507. polyfile/magic_defs/sysex +429 -0
  508. polyfile/magic_defs/tcl +29 -0
  509. polyfile/magic_defs/teapot +6 -0
  510. polyfile/magic_defs/terminfo +63 -0
  511. polyfile/magic_defs/tex +141 -0
  512. polyfile/magic_defs/tgif +7 -0
  513. polyfile/magic_defs/ti-8x +239 -0
  514. polyfile/magic_defs/timezone +42 -0
  515. polyfile/magic_defs/tplink +95 -0
  516. polyfile/magic_defs/troff +38 -0
  517. polyfile/magic_defs/tuxedo +8 -0
  518. polyfile/magic_defs/typeset +8 -0
  519. polyfile/magic_defs/uf2 +72 -0
  520. polyfile/magic_defs/unicode +15 -0
  521. polyfile/magic_defs/unisig +12 -0
  522. polyfile/magic_defs/unknown +34 -0
  523. polyfile/magic_defs/usd +21 -0
  524. polyfile/magic_defs/uterus +16 -0
  525. polyfile/magic_defs/uuencode +28 -0
  526. polyfile/magic_defs/vacuum-cleaner +54 -0
  527. polyfile/magic_defs/varied.out +46 -0
  528. polyfile/magic_defs/varied.script +21 -0
  529. polyfile/magic_defs/vax +32 -0
  530. polyfile/magic_defs/vicar +17 -0
  531. polyfile/magic_defs/virtual +307 -0
  532. polyfile/magic_defs/virtutech +12 -0
  533. polyfile/magic_defs/visx +32 -0
  534. polyfile/magic_defs/vms +30 -0
  535. polyfile/magic_defs/vmware +6 -0
  536. polyfile/magic_defs/vorbis +155 -0
  537. polyfile/magic_defs/vxl +14 -0
  538. polyfile/magic_defs/warc +16 -0
  539. polyfile/magic_defs/weak +16 -0
  540. polyfile/magic_defs/web +18 -0
  541. polyfile/magic_defs/webassembly +17 -0
  542. polyfile/magic_defs/windows +1811 -0
  543. polyfile/magic_defs/wireless +7 -0
  544. polyfile/magic_defs/wordprocessors +630 -0
  545. polyfile/magic_defs/wsdl +23 -0
  546. polyfile/magic_defs/x68000 +25 -0
  547. polyfile/magic_defs/xdelta +13 -0
  548. polyfile/magic_defs/xenix +106 -0
  549. polyfile/magic_defs/xilinx +58 -0
  550. polyfile/magic_defs/xo65 +37 -0
  551. polyfile/magic_defs/xwindows +43 -0
  552. polyfile/magic_defs/yara +17 -0
  553. polyfile/magic_defs/zfs +96 -0
  554. polyfile/magic_defs/zilog +12 -0
  555. polyfile/magic_defs/zip +126 -0
  556. polyfile/magic_defs/zyxel +17 -0
  557. polyfile/nes.py +144 -0
  558. polyfile/nitf.py +15 -0
  559. polyfile/pdf.py +1264 -0
  560. polyfile/pickles.py +45 -0
  561. polyfile/polyfile.py +409 -0
  562. polyfile/profiling.py +115 -0
  563. polyfile/repl.py +624 -0
  564. polyfile/search.py +310 -0
  565. polyfile/serialization.py +323 -0
  566. polyfile/structmatcher.py +46 -0
  567. polyfile/structs.py +281 -0
  568. polyfile/templates/download.js +162 -0
  569. polyfile/templates/hexdump.css +268 -0
  570. polyfile/templates/hexdump.js +756 -0
  571. polyfile/templates/jquery-3.4.1.min.js +2 -0
  572. polyfile/templates/template.html +119 -0
  573. polyfile/wildcards.py +62 -0
  574. polyfile/zipmatcher.py +183 -0
  575. polyfile_weave-0.5.5.dist-info/METADATA +173 -0
  576. polyfile_weave-0.5.5.dist-info/RECORD +585 -0
  577. polyfile_weave-0.5.5.dist-info/WHEEL +5 -0
  578. polyfile_weave-0.5.5.dist-info/entry_points.txt +2 -0
  579. polyfile_weave-0.5.5.dist-info/licenses/LICENSE +202 -0
  580. polyfile_weave-0.5.5.dist-info/top_level.txt +2 -0
  581. polymerge/__init__.py +1 -0
  582. polymerge/__main__.py +296 -0
  583. polymerge/cfg.py +127 -0
  584. polymerge/polymerge.py +227 -0
  585. polymerge/polytracker.py +190 -0
@@ -0,0 +1,548 @@
1
+ import importlib
2
+ from string import whitespace
3
+ import sys
4
+
5
+ from abnf.grammars.misc import load_grammar_rules
6
+
7
+ from ..logger import getStatusLogger
8
+
9
+ if sys.version_info < (3, 11):
10
+ # StrEnum is only available in Python 3.11 or newer
11
+ from enum import Enum
12
+ class StrEnum(str, Enum):
13
+ pass
14
+ else:
15
+ from enum import StrEnum
16
+
17
+
18
+ log = getStatusLogger("HTTP/1.1")
19
+
20
+ # loading the following modules is _really_ slow, so log its status!
21
+ for i, modname in enumerate(log.range((
22
+ "cors",
23
+ "rfc9110",
24
+ "rfc5322",
25
+ #"rfc4647",
26
+ #"rfc5646",
27
+ "rfc3986",
28
+ "rfc7230",
29
+ "rfc9111",
30
+ "rfc6265",
31
+ ), desc="Importing HTTP/1.1", unit=" grammars")):
32
+ mod = importlib.import_module(f".{modname}", package="abnf.grammars")
33
+ if i == 0:
34
+ cors = mod
35
+ elif i == 1:
36
+ rfc9110 = mod
37
+ elif i == 2:
38
+ rfc5322 = mod
39
+ elif i == 3:
40
+ rfc3986 = mod
41
+ elif i == 4:
42
+ rfc7230 = mod
43
+ elif i == 5:
44
+ rfc9111 = mod
45
+ elif i == 6:
46
+ rfc6265 = mod
47
+ else:
48
+ raise NotImplementedError()
49
+ log.debug(f"Loaded grammar for {modname}")
50
+ del mod
51
+ del modname
52
+ del i
53
+ log.clear_status()
54
+
55
+ from abnf import Rule, parser, Node
56
+
57
+ from . import defacto, deprecated, experimental, structured_headers
58
+
59
+ from typing import Dict, List, Optional, Set, Tuple
60
+
61
+ # The overall goal of this parser is to take in utf-8 textual representations
62
+ # of http requests, and make sense of the headers, then make sense of the
63
+ # body/ies according to the headers.
64
+
65
+ # - TODO: support lowercase header names such as 'content-length' instead of only 'Content-Length' (currently case sensitive)
66
+ # - TODO: create a similar parser for HTTP/2 requests (requires parsing HPACK and QPACK)
67
+ # - TODO: websockets upgrade negotiation headers
68
+ # - TODO: RFC9112 I think supersedes RFC9110 and should be accounted for here
69
+
70
+ # Response headers are not included here, since this parser is intended to parse valid http/1.1 requests. If the header can be used in either a request or a response, or only in a request, it should/can be included here.
71
+ # NB: the rfc9110.py class doesn't include all needed rules other specs and is missing HTTP headers used in practice but not defined in spec.
72
+ # NB: NodeVisitor#visit() replaces all dashes here with underscores.
73
+ request_rulelist: List[Tuple[str, Rule]] = [
74
+ ("BWS", rfc9110.Rule("BWS")),
75
+ ("OWS", rfc9110.Rule("OWS")),
76
+ ("RWS", rfc9110.Rule("RWS")),
77
+ ("Accept", rfc9110.Rule("Accept")),
78
+ ("Accept-Encoding", rfc9110.Rule("Accept-Encoding")),
79
+ ("Accept-Language", rfc9110.Rule("Accept-Language")),
80
+ ("Access-Control-Request-Headers", cors.Rule("Access-Control-Request-Headers")),
81
+ ("Age", rfc9111.Rule("Age")),
82
+ ("Allow", rfc9110.Rule("Allow")),
83
+ ("Authorization", rfc9110.Rule("Authorization")),
84
+ ("Cache-Control", rfc9111.Rule("Cache-Control")),
85
+ ("Connection", rfc9110.Rule("Connection")),
86
+ ("Content-Encoding", rfc9110.Rule("Content-Encoding")),
87
+ ("Content-Language", rfc9110.Rule("Content-Language")),
88
+ ("Content-Length", rfc9110.Rule("Content-Length")),
89
+ ("Content-Location", rfc9110.Rule("Content-Location")),
90
+ ("Content-Range", rfc9110.Rule("Content-Range")),
91
+ ("Content-Type", rfc9110.Rule("Content-Type")),
92
+ ("Date", rfc9110.Rule("Date")),
93
+ ("ETag", rfc9110.Rule("ETag")),
94
+ ("Expect", rfc9110.Rule("Expect")),
95
+ ("From", rfc5322.Rule("mailbox")),
96
+ ("HTTP-date", rfc9110.Rule("HTTP-date")),
97
+ ("Host", rfc9110.Rule("Host")),
98
+ ("If-Match", rfc9110.Rule("If-Match")),
99
+ ("If-Modified-Since", rfc9110.Rule("If-Modified-Since")),
100
+ ("If-None-Match", rfc9110.Rule("If-None-Match")),
101
+ ("If-Range", rfc9110.Rule("If-Range")),
102
+ ("If-Unmodified-Since", rfc9110.Rule("If-Unmodified-Since")),
103
+ ("Last-Modified", rfc9110.Rule("Last-Modified")),
104
+ ("Location", rfc9110.Rule("Location")),
105
+ ("Max-Forwards", rfc9110.Rule("Max-Forwards")),
106
+ ("Proxy-Authenticate", rfc9110.Rule("Proxy-Authenticate")),
107
+ ("Proxy-Authentication-Info", rfc9110.Rule("Proxy-Authentication-Info")),
108
+ ("Proxy-Authorization", rfc9110.Rule("Proxy-Authorization")),
109
+ ("Range", rfc9110.Rule("Range")),
110
+ ("Referer", rfc9110.Rule("Referer")),
111
+ ("Retry-After", rfc9110.Rule("Retry-After")),
112
+ ("TE", rfc9110.Rule("TE")),
113
+ ("Trailer", rfc9110.Rule("Trailer")),
114
+ ("Upgrade", rfc9110.Rule("Upgrade")),
115
+ ("User-Agent", rfc9110.Rule("User-Agent")),
116
+ ("Via", rfc9110.Rule("Via")),
117
+ ("WWW-Authenticate", rfc9110.Rule("WWW-Authenticate")),
118
+ ("absolute-URI", rfc3986.Rule("absolute-URI")),
119
+ ("absolute-path", rfc9110.Rule("absolute-path")),
120
+ ("chunked-body", rfc7230.Rule("chunked-body")),
121
+ ("cookie-string", rfc6265.Rule("cookie-string")),
122
+ ("defacto-header", defacto.Rule("defacto-header")),
123
+ ("deprecated-header", deprecated.Rule("deprecated-header")),
124
+ ("experimental-header", experimental.Rule("experimental-header")),
125
+ ("port", rfc3986.Rule("port")),
126
+ ("protocol", rfc9110.Rule("protocol")),
127
+ ("protocol-name", rfc9110.Rule("protocol-name")),
128
+ ("protocol-version", rfc9110.Rule("protocol-version")),
129
+ ("query", rfc3986.Rule("query")),
130
+ ("quoted-string", rfc9110.Rule("quoted-string")),
131
+ ("sh-boolean", structured_headers.Rule("sh-boolean")),
132
+ ("start-line", rfc7230.Rule("start-line")),
133
+ ("token", rfc9110.Rule("token")),
134
+ ("token68", rfc9110.Rule("token68")),
135
+ ("transfer-coding", rfc7230.Rule("transfer-coding")),
136
+ ]
137
+
138
+
139
+ @load_grammar_rules(request_rulelist)
140
+ class Http11RequestGrammar(Rule):
141
+ """
142
+ An HTTP/1.1 request grammar, which is applied in the HttpVisitor below as demonstrated in the associated unit test suite.
143
+
144
+ General References
145
+ - https://http.dev/headers#http-header-categories-and-names (also includes response headers; it is VERY IMPORTANT that response headers not be defined in Http11RequestGrammar and only be defined in TODO Http11ResponseGrammar!)
146
+ - How header fields generally get structured: https://www.rfc-editor.org/rfc/rfc7230#section-3.2
147
+ - Also helpful: https://www.rfc-editor.org/rfc/rfc5234 which describes how ABNF for syntax specifications works
148
+ - And https://www.rfc-editor.org/rfc/rfc9110#section-5.6.1 (pound sign definition for ABNF in e.g. Forwarded header RFC7239)
149
+ - Structured Header rules (even fancier than RFC 7230): https://datatracker.ietf.org/doc/html/rfc8941
150
+ """
151
+
152
+ grammar: List[str] = [
153
+ # https://www.rfc-editor.org/rfc/rfc7230 defines start-line and request.
154
+ "request = start-line 1*( header CR LF ) CR LF [ body ]",
155
+ 'request-path = absolute-path *( "?" query ) / "*"',
156
+ "header = caching-header / deprecated-header / end-to-end-header / hop-by-hop-header / experimental-header / unknown-or-bespoke-header",
157
+ # Added by proxies (forward and reverse) generally; mainly sourced from RFC 9110; not including response headers
158
+ 'hop-by-hop-header = "Connection:" OWS Connection OWS / "Forwarded:" OWS Forwarded OWS / "Keep-Alive:" OWS Keep-Alive OWS / "Proxy-Authenticate:" OWS Proxy-Authenticate OWS / "Proxy-Authentication-Info:" OWS Proxy-Authentication-Info OWS / "Proxy-Authorization:" OWS Proxy-Authorization OWS / "TE:" OWS TE OWS / "Trailer:" OWS Trailer OWS / "Transfer-Encoding:" OWS Transfer-Encoding OWS / "Upgrade:" OWS Upgrade OWS / "Via:" OWS Via OWS / defacto-header',
159
+ # Mainly sourced from RFC 9110 (but also includes eg RFC 6265 for cookies, and others); not including response headers
160
+ 'end-to-end-header = "Accept:" OWS Accept OWS / "Accept-Encoding:" OWS Accept-Encoding OWS / "Accept-Language:" OWS Accept-Language OWS / "Access-Control-Request-Headers:" OWS Access-Control-Request-Headers OWS / "Access-Control-Request-Method:" OWS Access-Control-Request-Method OWS / "Authorization:" OWS Authorization OWS / "Content-Encoding:" OWS Content-Encoding OWS / "Content-Language:" OWS Content-Language OWS / "Content-Length:" OWS Content-Length OWS / "Content-Range:" OWS Content-Range OWS / "Content-Type:" OWS Content-Type OWS / "Cookie:" OWS cookie-string OWS / "Date:" OWS Date OWS / "Expect:" OWS Expect OWS / "From:" OWS From OWS / "Host:" OWS Host OWS / "If-Match:" OWS If-Match OWS / "If-Modified-Since:" OWS If-Modified-Since OWS / "If-None-Match:" OWS If-None-Match OWS / "If-Range:" OWS If-Range OWS / "If-Unmodified-Since:" OWS If-Unmodified-Since OWS / "Location:" OWS Location OWS / "Max-Forwards:" OWS Max-Forwards OWS / "Range:" OWS Range OWS / "Referer:" OWS Referer OWS / "Retry-After:" OWS Retry-After OWS / "Sec-CH-UA:" OWS Sec-CH-UA OWS / "Sec-Fetch-Dest:" OWS Sec-Fetch-Dest OWS / "Sec-Fetch-Mode:" OWS Sec-Fetch-Mode OWS / "Sec-Fetch-Site:" OWS Sec-Fetch-Site OWS / "Sec-Fetch-User:" OWS Sec-Fetch-User OWS / "Service-Worker-Navigation-Preload:" OWS Service-Worker-Navigation-Preload OWS / "Upgrade-Insecure-Requests:" OWS Upgrade-Insecure-Requests OWS / "User-Agent:" OWS User-Agent OWS / "Want-Digest:" OWS Want-Digest OWS / "WWW-Authenticate:" OWS WWW-Authenticate OWS',
161
+ # rfc 9111 (caching) request headers follow
162
+ 'caching-header = "Age:" OWS Age OWS / "Cache-Control:" OWS Cache-Control OWS',
163
+ # TODO kaoudis this is a placeholder for all the other headers... should be a better way to do this
164
+ 'unknown-or-bespoke-header = token ":" OWS token OWS',
165
+ # https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods
166
+ "Access-Control-Request-Method = method",
167
+ # method is defined as 'token' in rfc9110 but better to be explicit...
168
+ 'method = "GET" / "HEAD" / "POST" / "PUT" / "PATCH" / "DELETE" / "TRACE" / "CONNECT" / "OPTIONS"',
169
+ # https://www.rfc-editor.org/rfc/rfc7239#section-4
170
+ 'Forwarded = forwarded-element *( OWS "," OWS forwarded-element )',
171
+ 'forwarded-element = [ forwarded-pair ] *( ";" [ forwarded-pair ] )',
172
+ 'forwarded-pair = token "=" value',
173
+ "value = token / quoted-string",
174
+ # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Keep-Alive
175
+ # https://httpwg.org/specs/rfc9112.html#compatibility.with.http.1.0.persistent.connections
176
+ # https://www.rfc-editor.org/rfc/rfc2068.html#section-19.7.1
177
+ 'Keep-Alive = keepalive-param *( OWS "," OWS keepalive-param )',
178
+ 'keepalive-param = param-name "=" value',
179
+ # param-name is a best guess, it doesn't seem to be explicitly defined
180
+ "param-name = token / quoted-string",
181
+ # https://w3c.github.io/webappsec-fetch-metadata/#sec-fetch-dest-header
182
+ 'Sec-Fetch-Dest = "audio" / "audioworklet" / "document" / "embed" / "empty" / "font" / "frame" / "iframe" / "image" / "manifest" / "object" / "paintworklet" / "report" / "script" / "serviceworker" / "sharedworker" / "style" / "track" / "video" / "worker" / "xslt"',
183
+ # https://w3c.github.io/webappsec-fetch-metadata/#sec-fetch-mode-header
184
+ 'Sec-Fetch-Mode = "cors" / "navigate" / "no-cors" / "same-origin" / "websocket"',
185
+ # https://w3c.github.io/webappsec-fetch-metadata/#sec-fetch-site-header
186
+ 'Sec-Fetch-Site = "cross-site" / "same-origin" / "same-site" / "none"',
187
+ # https://w3c.github.io/webappsec-fetch-metadata/#sec-fetch-user-header
188
+ # https://docs.w3cub.com/http/headers/sec-fetch-user.html
189
+ "Sec-Fetch-User = sh-boolean",
190
+ # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Service-Worker-Navigation-Preload
191
+ 'Service-Worker-Navigation-Preload = "true" / token / quoted-string',
192
+ # https://www.rfc-editor.org/rfc/rfc7230#section-4 this is a better defn of transfer-encoding than the rfc9112, which is less clear. Also c.f. https://www.rfc-editor.org/rfc/rfc7230#section-4.2.3
193
+ 'Transfer-Encoding = *( "," OWS ) transfer-coding-plus-x-gzip *( OWS "," [ OWS transfer-coding-plus-x-gzip ] )',
194
+ 'transfer-coding-plus-x-gzip = transfer-coding / "x-gzip"',
195
+ # https://w3c.github.io/webappsec-upgrade-insecure-requests/#preference
196
+ # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Upgrade-Insecure-Requests
197
+ 'Upgrade-Insecure-Requests = "1"',
198
+ # https://datatracker.ietf.org/doc/html/draft-ietf-httpbis-digest-headers#section-4
199
+ # https://http.dev/want-digest
200
+ 'Want-Digest = want-digest-value *( OWS "," OWS want-digest-value )',
201
+ 'want-digest-value = digest-algorithm [ ";" "q" "=" want-digest-qvalue]',
202
+ 'want-digest-qvalue = ( "0" [ "." 0*1DIGIT ] ) / ( "1" [ "." 0*1( "0" ) ] )',
203
+ # https://www.ietf.org/archive/id/draft-ietf-httpbis-digest-headers-04.html#section-5
204
+ 'digest-algorithm = "sha-256" / "sha-512" / "md5" / "sha" / "unixsum" / "unixcksum" / "id-sha-512" / "id-sha-256" / token',
205
+ # https://developer.mozilla.org/en-US/docs/Web/HTTP/Messages
206
+ # https://www.rfc-editor.org/rfc/rfc7230#section-3.3
207
+ # https://www.rfc-editor.org/rfc/rfc7230#section-3.5
208
+ "body = 1*OCTET",
209
+ ]
210
+
211
+
212
+ class HttpMethod(StrEnum):
213
+ GET = "GET"
214
+ HEAD = "HEAD"
215
+ POST = "POST"
216
+ PUT = "PUT"
217
+ PATCH = "PATCH"
218
+ DELETE = "DELETE"
219
+ TRACE = "TRACE"
220
+ CONNECT = "CONNECT"
221
+ OPTIONS = "OPTIONS"
222
+
223
+
224
+ class HttpVisitor(parser.NodeVisitor):
225
+ """Interprets information parsed into an AST using the abnf-powered
226
+ HTTP 1.1 request grammar.
227
+
228
+ NB: The NodeVisitor class requires a visit_Name()
229
+ method which can be called to visit only the section(s) of the AST of interest. Add (or edit) additional visitor methods for additional AST sections of interest.
230
+ """
231
+
232
+ _hex_without_zero = [
233
+ "1",
234
+ "2",
235
+ "3",
236
+ "4",
237
+ "5",
238
+ "6",
239
+ "7",
240
+ "8",
241
+ "9",
242
+ "a",
243
+ "b",
244
+ "c",
245
+ "d",
246
+ "e",
247
+ "f",
248
+ "A",
249
+ "B",
250
+ "C",
251
+ "D",
252
+ "E",
253
+ ]
254
+
255
+ # start-line
256
+ method: HttpMethod
257
+ request_target: str
258
+
259
+ # unprocessed headers (global)
260
+ headers: Dict[str, str] = None
261
+
262
+ # headers (specific)
263
+ content_type: Optional[str] = None
264
+ content_length: Optional[int] = None
265
+ host: Optional[str] = None
266
+
267
+ # body
268
+ body_raw: str = None
269
+ body_parsed: List[str] = None
270
+
271
+ def __init__(self):
272
+ super().__init__()
273
+
274
+ def _remove_header(self, name: str) -> None:
275
+ """Required by RFC to support Transfer-Encoding and TE headers. Once the transfer encoding no longer applies to the message, it must be removed."""
276
+ if name in self.headers:
277
+ self.headers.pop(name)
278
+
279
+ def visit_request(self, node: Node):
280
+ for child in node.children:
281
+ if child.name == "header":
282
+ if self.headers is None:
283
+ # prevents weird reuse between instances in unittest.
284
+ # TODO kaoudis find out why? maybe bad test config?
285
+ self.headers = dict()
286
+ header_name, header_value = child.value.strip(whitespace).split(":", 1)
287
+ self.headers[header_name] = header_value.strip(whitespace)
288
+ if child.name == "body":
289
+ self.body_raw = child.value
290
+ if self.body_parsed is None:
291
+ self.body_parsed = list()
292
+
293
+ self.visit(child)
294
+
295
+ def visit_start_line(self, node: Node):
296
+ for child in node.children:
297
+ self.visit(child)
298
+
299
+ def visit_request_line(self, node: Node):
300
+ for child in node.children:
301
+ self.visit(child)
302
+
303
+ def visit_method(self, node: Node):
304
+ self.method = HttpMethod(node.value)
305
+
306
+ def visit_request_target(self, node: Node):
307
+ self.request_target = node.value
308
+
309
+ def visit_header(self, node: Node):
310
+ for child in node.children:
311
+ self.visit(child)
312
+
313
+ def visit_end_to_end_header(self, node: Node):
314
+ for child in node.children:
315
+ if child.name == "Content-Length":
316
+ self.content_length = int(child.value)
317
+ elif child.name == "Content-Type":
318
+ self.content_type = child.value
319
+ elif child.name == "Host":
320
+ self.host = child.value
321
+
322
+ self.visit(child)
323
+
324
+ def visit_hop_by_hop_header(self, node: Node):
325
+ """RFC2616 and RFC7230 hop-by-hop headers."""
326
+ for child in node.children:
327
+ if child.name == "Connection":
328
+ self.connection = child.value
329
+ elif child.name == "Forwarded":
330
+ self.forwarded = child.value
331
+ elif child.name == "Keep-Alive":
332
+ self.keep_alive = child.value
333
+ elif child.name == "Proxy-Authenticate":
334
+ self.proxy_authenticate = child.value
335
+ elif child.name == "Proxy-Authentication-Info":
336
+ self.proxy_authentication_info = child.value
337
+ elif child.name == "Proxy-Authorization":
338
+ self.proxy_authorization = child.value
339
+ elif child.name == "TE":
340
+ self.te = child.value
341
+ elif child.name == "Trailer":
342
+ self.trailer = child.value
343
+ elif child.name == "Transfer-Encoding":
344
+ self.transfer_encoding = child.value
345
+ elif child.name == "Upgrade":
346
+ self.upgrade = child.value
347
+ elif child.name == "Via":
348
+ self.via = child.value
349
+
350
+ self.visit(child)
351
+
352
+ def visit_body(self, node: Node):
353
+ if (
354
+ self.content_length is not None
355
+ and self.content_length > 0
356
+ and not hasattr(self, "transfer_encoding")
357
+ and not hasattr(self, "te")
358
+ ):
359
+ octet_counter = self.content_length
360
+ for child in node.children:
361
+ # Append octets up to content-length if the body is not chunked encoding; the rest are still in the AST and may refer to various kinds of trailer(s) but should be ignored for purposes of content-length based body processing.
362
+ if octet_counter > 0 and child.name == "OCTET":
363
+ octet_counter -= 1
364
+ self.body_parsed.append(child.value)
365
+
366
+ self.visit(child)
367
+ elif (
368
+ hasattr(self, "transfer_encoding") and self.transfer_encoding == "chunked"
369
+ ) or (hasattr(self, "te") and self.te == "chunked"):
370
+ # this is a hack following the rfc parsing definition
371
+ # c.f. https://www.rfc-editor.org/rfc/rfc7230#section-4.1.3
372
+ # since abnf parses everything all at once and I really want
373
+ # it to be incremental to more sensibly follow the rfc defn
374
+ self.visit_chunked_body(node)
375
+ else:
376
+ return # do nothing
377
+
378
+ def _chunk_size(self, node_children: List[Node]) -> Tuple[int, int]:
379
+ """There is no max chunk size defined in spec. Therefore, read until the semicolon which would start a chunk extension, or until CR (for CR LF). The index returned is either the index of the first extension field semicolon, or the CR of the CR LF which indicates hte chunk body starts next. If there are trailers, we'll return the index of the first trailer, and chunk_size of 0."""
380
+ chunk_size_acc: List[str] = []
381
+ for child in node_children:
382
+ # https://stackoverflow.com/a/7058854
383
+ if child.value in self._hex_without_zero:
384
+ chunk_size_acc.append(child.value)
385
+ elif child.value == "0":
386
+ return (0, 1)
387
+ elif child.value == "\r":
388
+ # the chunk size and the index of the first non-chunk-size thing
389
+ return (int("".join(chunk_size_acc), 16), node_children.index(child))
390
+ else:
391
+ return (0, node_children.index(child))
392
+
393
+ def _accumulate_chunk_extensions(
394
+ self, node_children: List[Node], starting_index: int
395
+ ) -> Tuple[List[str], int]:
396
+ """Returns the chunk extensions and the first index after the following CRLF.
397
+ TODO kaoudis: handle chunk extensions more better according to RFC."""
398
+ chunk_ext: List[str] = []
399
+ for child in node_children[starting_index:]:
400
+ if child.value != "\r":
401
+ chunk_ext.append(child.value)
402
+ else:
403
+ index = node_children.index(child)
404
+ if len(node_children) >= index + 2 and node_children[index + 1] == "\n":
405
+ return (chunk_ext, index + 2)
406
+
407
+ def _accumulate_trailers(self, node_children: List[Node]):
408
+ """
409
+ Following https://www.rfc-editor.org/rfc/rfc7230#section-4.4,
410
+ we use the Trailer header to figure out what will be in the chunked transfer coding trailer(s) and add these additional headers to the Visitor instance's header list.
411
+
412
+ See also: https://www.rfc-editor.org/rfc/rfc7230#section-4.1.2, https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Trailer
413
+
414
+ The trailer fields are identical to header fields, except
415
+ they are sent in a chunked trailer instead of the message's header
416
+ section.
417
+
418
+ A sender MUST NOT generate a trailer that contains a field necessary
419
+ for message framing (e.g., Transfer-Encoding and Content-Length),
420
+ routing (e.g., Host), request modifiers (e.g., controls and
421
+ conditionals in Section 5 of [RFC7231]), authentication (e.g., see
422
+ [RFC7235] and [RFC6265]), response control data (e.g., see Section
423
+ 7.1 of [RFC7231]), or determining how to process the payload (e.g.,
424
+ Content-Encoding, Content-Type, Content-Range, and Trailer).
425
+
426
+ When a chunked message containing a non-empty trailer is received,
427
+ the recipient MAY process the fields (aside from those forbidden
428
+ above) as if they were appended to the message's header section. A
429
+ recipient MUST ignore (or consider as an error) any fields that are
430
+ forbidden to be sent in a trailer, since processing them as if they
431
+ were present in the header section might bypass external security
432
+ filters.
433
+ """
434
+
435
+ if hasattr(self, "trailer"):
436
+ disallowed_fields: Set = {
437
+ "Transfer-Encoding",
438
+ "Content-Length",
439
+ "Host",
440
+ "Cache-Control",
441
+ "Max-Forwards",
442
+ "TE",
443
+ "Authorization",
444
+ "Set-Cookie",
445
+ "Content-Encoding",
446
+ "Content-Type",
447
+ "Content-Range",
448
+ "Trailer",
449
+ }
450
+
451
+ trailer_field_names: Set = set(self.trailer.split(","))
452
+ allowed_trailer_field_names: Set = trailer_field_names - disallowed_fields
453
+ trailer_headers_string = []
454
+
455
+ for node in node_children:
456
+ trailer_headers_string.append(node.value)
457
+
458
+ # TODO kaoudis this is utterly ridiculous, clean it up
459
+ trailer_headers: List[str] = list(
460
+ filter(None, "".join(trailer_headers_string).split("\r\n"))
461
+ )
462
+
463
+ for header in trailer_headers:
464
+ name, value = header.strip(whitespace).split(":", 1)
465
+ if name in allowed_trailer_field_names:
466
+ # A recipient that retains a received trailer field MUST
467
+ # either store/forward the trailer field separately from
468
+ # the received header fields or merge the received trailer
469
+ # field into the header section. A recipient MUST NOT merge
470
+ # a received trailer field into the header section unless
471
+ # its corresponding header field definition explicitly
472
+ # permits and instructs how the trailer field value can be
473
+ # safely merged.
474
+ self.headers[name] = value.strip(whitespace)
475
+ self.__setattr__(name.lower(), value)
476
+
477
+ def _accumulate_chunks(self, node_children: List[Node], length: int = 0) -> int:
478
+ """An utterly hideous yet hopefully fairly close interpretation of the chunk accumulation algorithm from rfc7230 and rfc9112."""
479
+
480
+ # read chunk-size, chunk-ext (if any), and CR LF
481
+ (chunk_size, next_index) = self._chunk_size(node_children)
482
+ length += chunk_size
483
+
484
+ # node_children[1] = CR; node_children[2] = LF if no chunk-ext
485
+ # https://www.rfc-editor.org/rfc/rfc7230#section-4.1.1
486
+ # a recipient must ignore unrecognized chunk-extensions!
487
+ if chunk_size > 0:
488
+ if (
489
+ node_children[next_index].value != ";"
490
+ and len(node_children) >= next_index + 2
491
+ ):
492
+ starting_index: int = next_index + 2
493
+ else:
494
+ # in theory chunk extensions should start with ';'
495
+ (self.chunk_ext, starting_index) = self._accumulate_chunk_extensions(
496
+ node_children, next_index
497
+ )
498
+ elif not hasattr(self, "trailer"):
499
+ # chunk size is 0 and no trailer, so we are done
500
+ return chunk_size
501
+
502
+ for child in node_children[starting_index:]:
503
+ index = node_children.index(child)
504
+
505
+ if chunk_size > 0:
506
+ # just blindly trust the number lol
507
+ self.body_parsed.append(child.value)
508
+ chunk_size -= 1
509
+ elif (
510
+ index + 3 <= len(node_children)
511
+ and child.value == "\r"
512
+ and node_children[index + 1].value == "\n"
513
+ and node_children[index + 2].value in self._hex_without_zero
514
+ ):
515
+ # If not 0 and all octets are accounted for, the next thing should be the next chunk size. Keep adding data to body_parsed.
516
+ slice_index = index + 2
517
+ length += self._accumulate_chunks(node_children[slice_index:], length)
518
+ elif (
519
+ # hasattr 'trailer' means there was a Trailer header
520
+ hasattr(self, "trailer")
521
+ # '\r\n0\r\n' where our current index is the first '\r'
522
+ and index + 5 <= len(node_children)
523
+ and child.value == "\r"
524
+ # ending chunks w/ '0\r\n'
525
+ and node_children[index + 2].value == "0"
526
+ ):
527
+ trailer_starting_index = index + 5
528
+ self._accumulate_trailers(node_children[trailer_starting_index:])
529
+ return length
530
+ else:
531
+ return length
532
+
533
+ self.visit(child)
534
+
535
+ def visit_chunked_body(self, node: Node):
536
+ """Defined in RFC 7230 Sec. 4.1.3
537
+ c.f. https://www.rfc-editor.org/rfc/rfc7230#section-4.1.3 and repeated! in https://www.rfc-editor.org/rfc/rfc9112#section-7.1.3
538
+
539
+ If a message is received with both a Transfer-Encoding and a
540
+ Content-Length header field, the Transfer-Encoding overrides the
541
+ Content-Length. Such a message might indicate an attempt to
542
+ perform request smuggling (Section 9.5) or response splitting
543
+ (Section 9.4) and ought to be handled as an error. A sender MUST
544
+ remove the received Content-Length field prior to forwarding such
545
+ a message downstream.
546
+ """
547
+ self.content_length = self._accumulate_chunks(node_children=node.children)
548
+ self._remove_header("Transfer-Encoding")
@@ -0,0 +1,37 @@
1
+ from pathlib import Path
2
+
3
+ from ..ast import Node as ASTNode
4
+ from ..fileutils import ExactNamedTempfile, FileStream
5
+ from ..magic import MagicMatcher
6
+ from ..polyfile import register_parser, Match
7
+
8
+
9
+ Http11RequestGrammar = None
10
+
11
+ HTTP_MIME_TYPE: str = "message/x-http"
12
+ HTTP_11_MIME_TYPE: str = f"{HTTP_MIME_TYPE}; version=1.1"
13
+
14
+
15
+ # Register a magic matcher for HTTP 1.1 headers:
16
+ with ExactNamedTempfile(b"""0 regex/s [^\\\\n]*?\\\\s+HTTP/1.1\\\\s*$ HTTP 1.1
17
+ !:mime """ + HTTP_11_MIME_TYPE.encode("utf-8") + b"""
18
+ >0 string GET GET request header
19
+ >0 string POST POST request header
20
+ >0 string PUT PUT request header
21
+ """, name="HTTP1.1Matcher") as t:
22
+ http_11_matcher = MagicMatcher.DEFAULT_INSTANCE.add(Path(t))[0]
23
+
24
+
25
+ @register_parser(HTTP_11_MIME_TYPE)
26
+ def parse_http_11(file_stream: FileStream, parent: Match):
27
+ offset = file_stream.tell()
28
+ file_stream.seek(0)
29
+ global Http11RequestGrammar
30
+ if Http11RequestGrammar is None:
31
+ # the http_11 module takes a _really_ long time to load/parse the grammar, so do this lazily
32
+
33
+ from .http_11 import Http11RequestGrammar as Grammar
34
+ Http11RequestGrammar = Grammar
35
+ http_ast, _ = Http11RequestGrammar("request").parse(file_stream.read().decode("utf-8"), start=offset)
36
+ root_node = ASTNode.load(http_ast)
37
+ yield from root_node.to_matches(parent)
@@ -0,0 +1,48 @@
1
+ from abnf.grammars.misc import load_grammar_rules
2
+ from abnf.grammars import rfc9110, rfc3986
3
+ from abnf import Rule as _Rule
4
+ from typing import List, Tuple
5
+
6
+ rulelist: List[Tuple[str, _Rule]] = [
7
+ ("OWS", rfc9110.Rule("OWS")),
8
+ ]
9
+
10
+
11
+ @load_grammar_rules(rulelist)
12
+ class Rule(_Rule):
13
+ """
14
+ Several headers that Mozilla considers 'experimental' rely on ABNF constructions from RFC 8941: Structured Headers.
15
+
16
+ https://datatracker.ietf.org/doc/html/rfc8941
17
+ Also possibly of note: https://datatracker.ietf.org/doc/html/rfc8941#section-4.2
18
+ """
19
+
20
+ grammar: List[str] = [
21
+ 'sh-list = list-member *( OWS "," OWS list-member )',
22
+ "list-member = sh-item / inner-list",
23
+ 'inner-list = "(" *SP [ sh-item *( 1*SP sh-item ) *SP ] ")" *parameter',
24
+ "sh-item = bare-item parameters",
25
+ "bare-item = sh-integer / sh-decimal / sh-string / sh-token / sh-binary / sh-boolean",
26
+ 'sh-integer = ["-"] 1*15DIGIT',
27
+ 'sh-decimal = ["-"] 1*12DIGIT "." 1*3DIGIT',
28
+ "sh-string = DQUOTE *chr DQUOTE",
29
+ "chr = unescaped / escaped",
30
+ "unescaped = %x20-21 / %x23-5B / %x5D-7E",
31
+ 'escaped = "\\" ( DQUOTE / "\\" )',
32
+ 'sh-token = ( ALPHA / "*" ) *( tchar / ":" / "/" )',
33
+ 'sh-binary = ":" *(base64) ":"',
34
+ 'base64 = ALPHA / DIGIT / "+" / "/" / "="',
35
+ 'sh-boolean = "?" boolean',
36
+ 'boolean = "0" / "1"',
37
+ # Note that parameters are ordered as serialized, and parameter keys cannot contain uppercase letters. A parameter is separated from its Item or Inner List and other parameters by a semicolon.
38
+ 'parameters = *( ";" *SP parameter )',
39
+ 'parameter = param-key [ "=" param-value ]',
40
+ "param-key = key",
41
+ 'key = ( lcalpha / "*" ) *( lcalpha / DIGIT / "_" / "-" / "." / "*" )',
42
+ "lcalpha = %x61-7A ; a-z",
43
+ "param-value = bare-item",
44
+ 'sh-dictionary = dict-member *( OWS "," OWS dict-member )',
45
+ 'dict-member = member-key ( parameters / ( "=" member-value ))',
46
+ "member-key = key",
47
+ "member-value = sh-item / inner-list",
48
+ ]