polyfile-weave 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polyfile-weave might be problematic. Click here for more details.
- polyfile/__init__.py +15 -0
- polyfile/__main__.py +394 -0
- polyfile/arithmetic.py +27 -0
- polyfile/ast.py +114 -0
- polyfile/debugger.py +1039 -0
- polyfile/expressions.py +346 -0
- polyfile/fileutils.py +343 -0
- polyfile/html.py +135 -0
- polyfile/http/__init__.py +1 -0
- polyfile/http/defacto.py +37 -0
- polyfile/http/deprecated.py +51 -0
- polyfile/http/experimental.py +67 -0
- polyfile/http/http_11.py +548 -0
- polyfile/http/matcher.py +37 -0
- polyfile/http/structured_headers.py +48 -0
- polyfile/iterators.py +72 -0
- polyfile/jpeg.py +24 -0
- polyfile/kaitai/__init__.py +0 -0
- polyfile/kaitai/compiler.py +156 -0
- polyfile/kaitai/parser.py +312 -0
- polyfile/kaitai/parsers/__init__.py +0 -0
- polyfile/kaitai/parsers/aix_utmp.py +116 -0
- polyfile/kaitai/parsers/allegro_dat.py +367 -0
- polyfile/kaitai/parsers/andes_firmware.py +64 -0
- polyfile/kaitai/parsers/android_bootldr_asus.py +105 -0
- polyfile/kaitai/parsers/android_bootldr_huawei.py +181 -0
- polyfile/kaitai/parsers/android_bootldr_qcom.py +217 -0
- polyfile/kaitai/parsers/android_dto.py +138 -0
- polyfile/kaitai/parsers/android_img.py +319 -0
- polyfile/kaitai/parsers/android_nanoapp_header.py +83 -0
- polyfile/kaitai/parsers/android_opengl_shaders_cache.py +151 -0
- polyfile/kaitai/parsers/android_sparse.py +237 -0
- polyfile/kaitai/parsers/android_super.py +401 -0
- polyfile/kaitai/parsers/apm_partition_table.py +196 -0
- polyfile/kaitai/parsers/apple_single_double.py +180 -0
- polyfile/kaitai/parsers/asn1_der.py +235 -0
- polyfile/kaitai/parsers/au.py +138 -0
- polyfile/kaitai/parsers/avantes_roh60.py +112 -0
- polyfile/kaitai/parsers/avi.py +296 -0
- polyfile/kaitai/parsers/bcd.py +111 -0
- polyfile/kaitai/parsers/bitcoin_transaction.py +210 -0
- polyfile/kaitai/parsers/blender_blend.py +334 -0
- polyfile/kaitai/parsers/bmp.py +780 -0
- polyfile/kaitai/parsers/bson.py +411 -0
- polyfile/kaitai/parsers/btrfs_stream.py +318 -0
- polyfile/kaitai/parsers/bytes_with_io.py +27 -0
- polyfile/kaitai/parsers/chrome_pak.py +194 -0
- polyfile/kaitai/parsers/code_6502.py +456 -0
- polyfile/kaitai/parsers/compressed_resource.py +217 -0
- polyfile/kaitai/parsers/cpio_old_le.py +154 -0
- polyfile/kaitai/parsers/cramfs.py +344 -0
- polyfile/kaitai/parsers/creative_voice_file.py +342 -0
- polyfile/kaitai/parsers/dbf.py +274 -0
- polyfile/kaitai/parsers/dcmp_0.py +664 -0
- polyfile/kaitai/parsers/dcmp_1.py +422 -0
- polyfile/kaitai/parsers/dcmp_2.py +312 -0
- polyfile/kaitai/parsers/dcmp_variable_length_integer.py +66 -0
- polyfile/kaitai/parsers/dex.py +1086 -0
- polyfile/kaitai/parsers/dicom.py +4370 -0
- polyfile/kaitai/parsers/dime_message.py +201 -0
- polyfile/kaitai/parsers/dns_packet.py +569 -0
- polyfile/kaitai/parsers/doom_wad.py +654 -0
- polyfile/kaitai/parsers/dos_datetime.py +191 -0
- polyfile/kaitai/parsers/dos_mz.py +172 -0
- polyfile/kaitai/parsers/ds_store.py +513 -0
- polyfile/kaitai/parsers/dtb.py +310 -0
- polyfile/kaitai/parsers/dune_2_pak.py +126 -0
- polyfile/kaitai/parsers/edid.py +472 -0
- polyfile/kaitai/parsers/efivar_signature_list.py +331 -0
- polyfile/kaitai/parsers/elf.py +2482 -0
- polyfile/kaitai/parsers/ethernet_frame.py +114 -0
- polyfile/kaitai/parsers/exif.py +723 -0
- polyfile/kaitai/parsers/ext2.py +537 -0
- polyfile/kaitai/parsers/fallout2_dat.py +187 -0
- polyfile/kaitai/parsers/fallout_dat.py +156 -0
- polyfile/kaitai/parsers/fasttracker_xm_module.py +558 -0
- polyfile/kaitai/parsers/ftl_dat.py +90 -0
- polyfile/kaitai/parsers/genmidi_op2.py +161 -0
- polyfile/kaitai/parsers/gettext_mo.py +541 -0
- polyfile/kaitai/parsers/gif.py +492 -0
- polyfile/kaitai/parsers/gimp_brush.py +244 -0
- polyfile/kaitai/parsers/glibc_utmp.py +114 -0
- polyfile/kaitai/parsers/gltf_binary.py +132 -0
- polyfile/kaitai/parsers/google_protobuf.py +151 -0
- polyfile/kaitai/parsers/gpt_partition_table.py +175 -0
- polyfile/kaitai/parsers/gran_turismo_vol.py +140 -0
- polyfile/kaitai/parsers/grub2_font.py +337 -0
- polyfile/kaitai/parsers/gzip.py +232 -0
- polyfile/kaitai/parsers/hashcat_restore.py +60 -0
- polyfile/kaitai/parsers/hccap.py +111 -0
- polyfile/kaitai/parsers/hccapx.py +103 -0
- polyfile/kaitai/parsers/heaps_pak.py +177 -0
- polyfile/kaitai/parsers/heroes_of_might_and_magic_agg.py +116 -0
- polyfile/kaitai/parsers/heroes_of_might_and_magic_bmp.py +34 -0
- polyfile/kaitai/parsers/icmp_packet.py +136 -0
- polyfile/kaitai/parsers/ico.py +129 -0
- polyfile/kaitai/parsers/id3v1_1.py +220 -0
- polyfile/kaitai/parsers/id3v2_3.py +324 -0
- polyfile/kaitai/parsers/id3v2_4.py +423 -0
- polyfile/kaitai/parsers/ines.py +282 -0
- polyfile/kaitai/parsers/ipv4_packet.py +158 -0
- polyfile/kaitai/parsers/ipv6_packet.py +55 -0
- polyfile/kaitai/parsers/iso9660.py +544 -0
- polyfile/kaitai/parsers/java_class.py +1113 -0
- polyfile/kaitai/parsers/jpeg.py +361 -0
- polyfile/kaitai/parsers/luks.py +149 -0
- polyfile/kaitai/parsers/lzh.py +165 -0
- polyfile/kaitai/parsers/mac_os_resource_snd.py +493 -0
- polyfile/kaitai/parsers/mach_o.py +3033 -0
- polyfile/kaitai/parsers/mach_o_fat.py +92 -0
- polyfile/kaitai/parsers/magicavoxel_vox.py +391 -0
- polyfile/kaitai/parsers/manifest.json +1 -0
- polyfile/kaitai/parsers/mbr_partition_table.py +119 -0
- polyfile/kaitai/parsers/mcap.py +1015 -0
- polyfile/kaitai/parsers/microsoft_cfb.py +293 -0
- polyfile/kaitai/parsers/microsoft_network_monitor_v2.py +309 -0
- polyfile/kaitai/parsers/microsoft_pe.py +765 -0
- polyfile/kaitai/parsers/mifare_classic.py +706 -0
- polyfile/kaitai/parsers/minecraft_nbt.py +449 -0
- polyfile/kaitai/parsers/monomakh_sapr_chg.py +69 -0
- polyfile/kaitai/parsers/mozilla_mar.py +239 -0
- polyfile/kaitai/parsers/mp4.py +333 -0
- polyfile/kaitai/parsers/msgpack.py +467 -0
- polyfile/kaitai/parsers/nitf.py +1189 -0
- polyfile/kaitai/parsers/nt_mdt_pal.py +155 -0
- polyfile/kaitai/parsers/ogg.py +118 -0
- polyfile/kaitai/parsers/openpgp_message.py +993 -0
- polyfile/kaitai/parsers/packet_ppi.py +515 -0
- polyfile/kaitai/parsers/pcap.py +344 -0
- polyfile/kaitai/parsers/pcf_font.py +506 -0
- polyfile/kaitai/parsers/pcx.py +195 -0
- polyfile/kaitai/parsers/pcx_dcx.py +79 -0
- polyfile/kaitai/parsers/phar_without_stub.py +399 -0
- polyfile/kaitai/parsers/php_serialized_value.py +505 -0
- polyfile/kaitai/parsers/png.py +721 -0
- polyfile/kaitai/parsers/protocol_body.py +260 -0
- polyfile/kaitai/parsers/psx_tim.py +104 -0
- polyfile/kaitai/parsers/python_pickle.py +718 -0
- polyfile/kaitai/parsers/python_pyc_27.py +510 -0
- polyfile/kaitai/parsers/quake_mdl.py +441 -0
- polyfile/kaitai/parsers/quake_pak.py +112 -0
- polyfile/kaitai/parsers/quicktime_mov.py +634 -0
- polyfile/kaitai/parsers/rar.py +265 -0
- polyfile/kaitai/parsers/regf.py +569 -0
- polyfile/kaitai/parsers/renderware_binary_stream.py +877 -0
- polyfile/kaitai/parsers/resource_fork.py +611 -0
- polyfile/kaitai/parsers/respack.py +57 -0
- polyfile/kaitai/parsers/riff.py +409 -0
- polyfile/kaitai/parsers/rpm.py +964 -0
- polyfile/kaitai/parsers/rtcp_payload.py +579 -0
- polyfile/kaitai/parsers/rtp_packet.py +150 -0
- polyfile/kaitai/parsers/rtpdump.py +115 -0
- polyfile/kaitai/parsers/ruby_marshal.py +423 -0
- polyfile/kaitai/parsers/s3m.py +493 -0
- polyfile/kaitai/parsers/saints_row_2_vpp_pc.py +254 -0
- polyfile/kaitai/parsers/shapefile_index.py +174 -0
- polyfile/kaitai/parsers/shapefile_main.py +893 -0
- polyfile/kaitai/parsers/some_ip.py +209 -0
- polyfile/kaitai/parsers/some_ip_container.py +37 -0
- polyfile/kaitai/parsers/some_ip_sd.py +86 -0
- polyfile/kaitai/parsers/some_ip_sd_entries.py +160 -0
- polyfile/kaitai/parsers/some_ip_sd_options.py +374 -0
- polyfile/kaitai/parsers/specpr.py +404 -0
- polyfile/kaitai/parsers/sqlite3.py +472 -0
- polyfile/kaitai/parsers/ssh_public_key.py +252 -0
- polyfile/kaitai/parsers/standard_midi_file.py +390 -0
- polyfile/kaitai/parsers/stl.py +111 -0
- polyfile/kaitai/parsers/sudoers_ts.py +201 -0
- polyfile/kaitai/parsers/swf.py +406 -0
- polyfile/kaitai/parsers/systemd_journal.py +361 -0
- polyfile/kaitai/parsers/tcp_segment.py +57 -0
- polyfile/kaitai/parsers/tga.py +213 -0
- polyfile/kaitai/parsers/tls_client_hello.py +293 -0
- polyfile/kaitai/parsers/tr_dos_image.py +322 -0
- polyfile/kaitai/parsers/tsm.py +198 -0
- polyfile/kaitai/parsers/ttf.py +1847 -0
- polyfile/kaitai/parsers/udp_datagram.py +42 -0
- polyfile/kaitai/parsers/uefi_te.py +236 -0
- polyfile/kaitai/parsers/uimage.py +198 -0
- polyfile/kaitai/parsers/utf8_string.py +137 -0
- polyfile/kaitai/parsers/vfat.py +410 -0
- polyfile/kaitai/parsers/vlq_base128_be.py +104 -0
- polyfile/kaitai/parsers/vlq_base128_le.py +129 -0
- polyfile/kaitai/parsers/vmware_vmdk.py +167 -0
- polyfile/kaitai/parsers/vp8_ivf.py +112 -0
- polyfile/kaitai/parsers/warcraft_2_pud.py +423 -0
- polyfile/kaitai/parsers/wav.py +1014 -0
- polyfile/kaitai/parsers/websocket.py +167 -0
- polyfile/kaitai/parsers/windows_evt_log.py +304 -0
- polyfile/kaitai/parsers/windows_lnk_file.py +467 -0
- polyfile/kaitai/parsers/windows_minidump.py +575 -0
- polyfile/kaitai/parsers/windows_resource_file.py +243 -0
- polyfile/kaitai/parsers/windows_shell_items.py +190 -0
- polyfile/kaitai/parsers/windows_systemtime.py +52 -0
- polyfile/kaitai/parsers/wmf.py +502 -0
- polyfile/kaitai/parsers/xar.py +181 -0
- polyfile/kaitai/parsers/xwd.py +189 -0
- polyfile/kaitai/parsers/zip.py +685 -0
- polyfile/kaitai/parsers/zisofs.py +158 -0
- polyfile/kaitai/parsers/zx_spectrum_tap.py +184 -0
- polyfile/kaitaimatcher.py +113 -0
- polyfile/languagematcher.py +217 -0
- polyfile/logger.py +135 -0
- polyfile/magic.py +2983 -0
- polyfile/magic_defs/COPYING +29 -0
- polyfile/magic_defs/__init__.py +0 -0
- polyfile/magic_defs/acorn +102 -0
- polyfile/magic_defs/adi +13 -0
- polyfile/magic_defs/adventure +122 -0
- polyfile/magic_defs/aes +29 -0
- polyfile/magic_defs/algol68 +35 -0
- polyfile/magic_defs/allegro +9 -0
- polyfile/magic_defs/alliant +18 -0
- polyfile/magic_defs/alpha +32 -0
- polyfile/magic_defs/amanda +12 -0
- polyfile/magic_defs/amigaos +218 -0
- polyfile/magic_defs/android +259 -0
- polyfile/magic_defs/animation +1197 -0
- polyfile/magic_defs/aout +46 -0
- polyfile/magic_defs/apache +28 -0
- polyfile/magic_defs/apl +7 -0
- polyfile/magic_defs/apple +773 -0
- polyfile/magic_defs/application +7 -0
- polyfile/magic_defs/applix +13 -0
- polyfile/magic_defs/apt +52 -0
- polyfile/magic_defs/archive +2586 -0
- polyfile/magic_defs/aria +38 -0
- polyfile/magic_defs/arm +50 -0
- polyfile/magic_defs/asf +132 -0
- polyfile/magic_defs/assembler +18 -0
- polyfile/magic_defs/asterix +18 -0
- polyfile/magic_defs/att3b +41 -0
- polyfile/magic_defs/audio +1291 -0
- polyfile/magic_defs/avm +33 -0
- polyfile/magic_defs/basis +18 -0
- polyfile/magic_defs/beetle +7 -0
- polyfile/magic_defs/ber +65 -0
- polyfile/magic_defs/bflt +14 -0
- polyfile/magic_defs/bhl +10 -0
- polyfile/magic_defs/bioinformatics +178 -0
- polyfile/magic_defs/biosig +154 -0
- polyfile/magic_defs/blackberry +8 -0
- polyfile/magic_defs/blcr +25 -0
- polyfile/magic_defs/blender +50 -0
- polyfile/magic_defs/blit +24 -0
- polyfile/magic_defs/bm +10 -0
- polyfile/magic_defs/bout +11 -0
- polyfile/magic_defs/bsdi +33 -0
- polyfile/magic_defs/bsi +10 -0
- polyfile/magic_defs/btsnoop +13 -0
- polyfile/magic_defs/burp +7 -0
- polyfile/magic_defs/bytecode +41 -0
- polyfile/magic_defs/c-lang +110 -0
- polyfile/magic_defs/c64 +531 -0
- polyfile/magic_defs/cad +437 -0
- polyfile/magic_defs/cafebabe +107 -0
- polyfile/magic_defs/cbor +21 -0
- polyfile/magic_defs/ccf +14 -0
- polyfile/magic_defs/cddb +12 -0
- polyfile/magic_defs/chord +15 -0
- polyfile/magic_defs/cisco +12 -0
- polyfile/magic_defs/citrus +12 -0
- polyfile/magic_defs/clarion +27 -0
- polyfile/magic_defs/claris +48 -0
- polyfile/magic_defs/clipper +65 -0
- polyfile/magic_defs/clojure +30 -0
- polyfile/magic_defs/coff +98 -0
- polyfile/magic_defs/commands +201 -0
- polyfile/magic_defs/communications +22 -0
- polyfile/magic_defs/compress +461 -0
- polyfile/magic_defs/console +1213 -0
- polyfile/magic_defs/convex +69 -0
- polyfile/magic_defs/coverage +91 -0
- polyfile/magic_defs/cracklib +14 -0
- polyfile/magic_defs/crypto +31 -0
- polyfile/magic_defs/csv +8 -0
- polyfile/magic_defs/ctags +6 -0
- polyfile/magic_defs/ctf +23 -0
- polyfile/magic_defs/cubemap +8 -0
- polyfile/magic_defs/cups +56 -0
- polyfile/magic_defs/dact +11 -0
- polyfile/magic_defs/database +886 -0
- polyfile/magic_defs/dataone +47 -0
- polyfile/magic_defs/dbpf +15 -0
- polyfile/magic_defs/der +146 -0
- polyfile/magic_defs/diamond +12 -0
- polyfile/magic_defs/dif +33 -0
- polyfile/magic_defs/diff +41 -0
- polyfile/magic_defs/digital +59 -0
- polyfile/magic_defs/dolby +69 -0
- polyfile/magic_defs/dsf +25 -0
- polyfile/magic_defs/dump +96 -0
- polyfile/magic_defs/dwarfs +45 -0
- polyfile/magic_defs/dyadic +61 -0
- polyfile/magic_defs/ebml +8 -0
- polyfile/magic_defs/edid +11 -0
- polyfile/magic_defs/editors +43 -0
- polyfile/magic_defs/efi +15 -0
- polyfile/magic_defs/elf +379 -0
- polyfile/magic_defs/encore +22 -0
- polyfile/magic_defs/epoc +62 -0
- polyfile/magic_defs/erlang +21 -0
- polyfile/magic_defs/espressif +57 -0
- polyfile/magic_defs/esri +28 -0
- polyfile/magic_defs/etf +33 -0
- polyfile/magic_defs/fcs +9 -0
- polyfile/magic_defs/filesystems +2694 -0
- polyfile/magic_defs/finger +16 -0
- polyfile/magic_defs/firmware +133 -0
- polyfile/magic_defs/flash +62 -0
- polyfile/magic_defs/flif +36 -0
- polyfile/magic_defs/fonts +449 -0
- polyfile/magic_defs/forth +82 -0
- polyfile/magic_defs/fortran +9 -0
- polyfile/magic_defs/frame +62 -0
- polyfile/magic_defs/freebsd +164 -0
- polyfile/magic_defs/fsav +128 -0
- polyfile/magic_defs/fusecompress +12 -0
- polyfile/magic_defs/games +696 -0
- polyfile/magic_defs/gcc +17 -0
- polyfile/magic_defs/gconv +10 -0
- polyfile/magic_defs/gentoo +85 -0
- polyfile/magic_defs/geo +166 -0
- polyfile/magic_defs/geos +20 -0
- polyfile/magic_defs/gimp +77 -0
- polyfile/magic_defs/git +13 -0
- polyfile/magic_defs/glibc +21 -0
- polyfile/magic_defs/gnome +59 -0
- polyfile/magic_defs/gnu +173 -0
- polyfile/magic_defs/gnumeric +8 -0
- polyfile/magic_defs/gpt +240 -0
- polyfile/magic_defs/gpu +28 -0
- polyfile/magic_defs/grace +21 -0
- polyfile/magic_defs/graphviz +12 -0
- polyfile/magic_defs/gringotts +48 -0
- polyfile/magic_defs/guile +13 -0
- polyfile/magic_defs/hardware +12 -0
- polyfile/magic_defs/hitachi-sh +30 -0
- polyfile/magic_defs/hp +433 -0
- polyfile/magic_defs/human68k +26 -0
- polyfile/magic_defs/ibm370 +52 -0
- polyfile/magic_defs/ibm6000 +35 -0
- polyfile/magic_defs/icc +214 -0
- polyfile/magic_defs/iff +80 -0
- polyfile/magic_defs/images +4210 -0
- polyfile/magic_defs/inform +9 -0
- polyfile/magic_defs/intel +310 -0
- polyfile/magic_defs/interleaf +9 -0
- polyfile/magic_defs/island +10 -0
- polyfile/magic_defs/ispell +63 -0
- polyfile/magic_defs/isz +15 -0
- polyfile/magic_defs/java +52 -0
- polyfile/magic_defs/javascript +171 -0
- polyfile/magic_defs/jpeg +252 -0
- polyfile/magic_defs/json +8 -0
- polyfile/magic_defs/karma +9 -0
- polyfile/magic_defs/kde +11 -0
- polyfile/magic_defs/keepass +20 -0
- polyfile/magic_defs/kerberos +45 -0
- polyfile/magic_defs/kicad +85 -0
- polyfile/magic_defs/kml +34 -0
- polyfile/magic_defs/lammps +64 -0
- polyfile/magic_defs/lecter +6 -0
- polyfile/magic_defs/lex +12 -0
- polyfile/magic_defs/lif +50 -0
- polyfile/magic_defs/linux +557 -0
- polyfile/magic_defs/lisp +78 -0
- polyfile/magic_defs/llvm +22 -0
- polyfile/magic_defs/locoscript +12 -0
- polyfile/magic_defs/lua +31 -0
- polyfile/magic_defs/luks +126 -0
- polyfile/magic_defs/m4 +11 -0
- polyfile/magic_defs/mach +303 -0
- polyfile/magic_defs/macintosh +505 -0
- polyfile/magic_defs/macos +7 -0
- polyfile/magic_defs/magic +10 -0
- polyfile/magic_defs/magic.mgc +0 -0
- polyfile/magic_defs/mail.news +132 -0
- polyfile/magic_defs/make +21 -0
- polyfile/magic_defs/map +413 -0
- polyfile/magic_defs/maple +109 -0
- polyfile/magic_defs/marc21 +30 -0
- polyfile/magic_defs/mathcad +8 -0
- polyfile/magic_defs/mathematica +188 -0
- polyfile/magic_defs/matroska +17 -0
- polyfile/magic_defs/mcrypt +52 -0
- polyfile/magic_defs/measure +44 -0
- polyfile/magic_defs/mercurial +13 -0
- polyfile/magic_defs/metastore +8 -0
- polyfile/magic_defs/meteorological +53 -0
- polyfile/magic_defs/microfocus +21 -0
- polyfile/magic_defs/mime +9 -0
- polyfile/magic_defs/mips +120 -0
- polyfile/magic_defs/mirage +8 -0
- polyfile/magic_defs/misctools +140 -0
- polyfile/magic_defs/mkid +11 -0
- polyfile/magic_defs/mlssa +8 -0
- polyfile/magic_defs/mmdf +6 -0
- polyfile/magic_defs/modem +92 -0
- polyfile/magic_defs/modulefile +9 -0
- polyfile/magic_defs/motorola +71 -0
- polyfile/magic_defs/mozilla +37 -0
- polyfile/magic_defs/msdos +2304 -0
- polyfile/magic_defs/msooxml +68 -0
- polyfile/magic_defs/msvc +222 -0
- polyfile/magic_defs/msx +309 -0
- polyfile/magic_defs/mup +24 -0
- polyfile/magic_defs/music +17 -0
- polyfile/magic_defs/nasa +7 -0
- polyfile/magic_defs/natinst +24 -0
- polyfile/magic_defs/ncr +49 -0
- polyfile/magic_defs/neko +12 -0
- polyfile/magic_defs/netbsd +251 -0
- polyfile/magic_defs/netscape +26 -0
- polyfile/magic_defs/netware +11 -0
- polyfile/magic_defs/news +13 -0
- polyfile/magic_defs/nifty +202 -0
- polyfile/magic_defs/nim-lang +29 -0
- polyfile/magic_defs/nitpicker +14 -0
- polyfile/magic_defs/numpy +9 -0
- polyfile/magic_defs/oasis +12 -0
- polyfile/magic_defs/ocaml +14 -0
- polyfile/magic_defs/octave +6 -0
- polyfile/magic_defs/ole2compounddocs +760 -0
- polyfile/magic_defs/olf +98 -0
- polyfile/magic_defs/openfst +17 -0
- polyfile/magic_defs/opentimestamps +16 -0
- polyfile/magic_defs/oric +16 -0
- polyfile/magic_defs/os2 +186 -0
- polyfile/magic_defs/os400 +39 -0
- polyfile/magic_defs/os9 +80 -0
- polyfile/magic_defs/osf1 +10 -0
- polyfile/magic_defs/palm +156 -0
- polyfile/magic_defs/parix +13 -0
- polyfile/magic_defs/parrot +22 -0
- polyfile/magic_defs/pascal +39 -0
- polyfile/magic_defs/pbf +11 -0
- polyfile/magic_defs/pbm +8 -0
- polyfile/magic_defs/pc88 +24 -0
- polyfile/magic_defs/pc98 +77 -0
- polyfile/magic_defs/pci_ids +116 -0
- polyfile/magic_defs/pcjr +8 -0
- polyfile/magic_defs/pdf +51 -0
- polyfile/magic_defs/pdp +42 -0
- polyfile/magic_defs/perl +100 -0
- polyfile/magic_defs/pgf +52 -0
- polyfile/magic_defs/pgp +581 -0
- polyfile/magic_defs/pgp-binary-keys +388 -0
- polyfile/magic_defs/pkgadd +7 -0
- polyfile/magic_defs/plan9 +25 -0
- polyfile/magic_defs/playdate +57 -0
- polyfile/magic_defs/plus5 +18 -0
- polyfile/magic_defs/pmem +46 -0
- polyfile/magic_defs/polyfile_zip +5 -0
- polyfile/magic_defs/polyml +23 -0
- polyfile/magic_defs/printer +269 -0
- polyfile/magic_defs/project +10 -0
- polyfile/magic_defs/psdbms +14 -0
- polyfile/magic_defs/psl +14 -0
- polyfile/magic_defs/pulsar +13 -0
- polyfile/magic_defs/puzzle +17 -0
- polyfile/magic_defs/pwsafe +14 -0
- polyfile/magic_defs/pyramid +12 -0
- polyfile/magic_defs/python +305 -0
- polyfile/magic_defs/qt +30 -0
- polyfile/magic_defs/revision +66 -0
- polyfile/magic_defs/riff +840 -0
- polyfile/magic_defs/rinex +44 -0
- polyfile/magic_defs/ringdove +45 -0
- polyfile/magic_defs/rpi +52 -0
- polyfile/magic_defs/rpm +45 -0
- polyfile/magic_defs/rpmsg +7 -0
- polyfile/magic_defs/rst +11 -0
- polyfile/magic_defs/rtf +94 -0
- polyfile/magic_defs/ruby +55 -0
- polyfile/magic_defs/rust +21 -0
- polyfile/magic_defs/sc +7 -0
- polyfile/magic_defs/sccs +24 -0
- polyfile/magic_defs/scientific +144 -0
- polyfile/magic_defs/securitycerts +6 -0
- polyfile/magic_defs/selinux +24 -0
- polyfile/magic_defs/sendmail +37 -0
- polyfile/magic_defs/sequent +42 -0
- polyfile/magic_defs/sereal +35 -0
- polyfile/magic_defs/sgi +144 -0
- polyfile/magic_defs/sgml +161 -0
- polyfile/magic_defs/sharc +23 -0
- polyfile/magic_defs/sinclair +40 -0
- polyfile/magic_defs/sisu +18 -0
- polyfile/magic_defs/sketch +6 -0
- polyfile/magic_defs/smalltalk +25 -0
- polyfile/magic_defs/smile +34 -0
- polyfile/magic_defs/sniffer +482 -0
- polyfile/magic_defs/softquad +40 -0
- polyfile/magic_defs/sosi +40 -0
- polyfile/magic_defs/spec +21 -0
- polyfile/magic_defs/spectrum +184 -0
- polyfile/magic_defs/sql +288 -0
- polyfile/magic_defs/ssh +39 -0
- polyfile/magic_defs/ssl +20 -0
- polyfile/magic_defs/statistics +45 -0
- polyfile/magic_defs/subtitle +38 -0
- polyfile/magic_defs/sun +141 -0
- polyfile/magic_defs/svf +5 -0
- polyfile/magic_defs/sylk +36 -0
- polyfile/magic_defs/symbos +42 -0
- polyfile/magic_defs/sysex +429 -0
- polyfile/magic_defs/tcl +29 -0
- polyfile/magic_defs/teapot +6 -0
- polyfile/magic_defs/terminfo +63 -0
- polyfile/magic_defs/tex +141 -0
- polyfile/magic_defs/tgif +7 -0
- polyfile/magic_defs/ti-8x +239 -0
- polyfile/magic_defs/timezone +42 -0
- polyfile/magic_defs/tplink +95 -0
- polyfile/magic_defs/troff +38 -0
- polyfile/magic_defs/tuxedo +8 -0
- polyfile/magic_defs/typeset +8 -0
- polyfile/magic_defs/uf2 +72 -0
- polyfile/magic_defs/unicode +15 -0
- polyfile/magic_defs/unisig +12 -0
- polyfile/magic_defs/unknown +34 -0
- polyfile/magic_defs/usd +21 -0
- polyfile/magic_defs/uterus +16 -0
- polyfile/magic_defs/uuencode +28 -0
- polyfile/magic_defs/vacuum-cleaner +54 -0
- polyfile/magic_defs/varied.out +46 -0
- polyfile/magic_defs/varied.script +21 -0
- polyfile/magic_defs/vax +32 -0
- polyfile/magic_defs/vicar +17 -0
- polyfile/magic_defs/virtual +307 -0
- polyfile/magic_defs/virtutech +12 -0
- polyfile/magic_defs/visx +32 -0
- polyfile/magic_defs/vms +30 -0
- polyfile/magic_defs/vmware +6 -0
- polyfile/magic_defs/vorbis +155 -0
- polyfile/magic_defs/vxl +14 -0
- polyfile/magic_defs/warc +16 -0
- polyfile/magic_defs/weak +16 -0
- polyfile/magic_defs/web +18 -0
- polyfile/magic_defs/webassembly +17 -0
- polyfile/magic_defs/windows +1811 -0
- polyfile/magic_defs/wireless +7 -0
- polyfile/magic_defs/wordprocessors +630 -0
- polyfile/magic_defs/wsdl +23 -0
- polyfile/magic_defs/x68000 +25 -0
- polyfile/magic_defs/xdelta +13 -0
- polyfile/magic_defs/xenix +106 -0
- polyfile/magic_defs/xilinx +58 -0
- polyfile/magic_defs/xo65 +37 -0
- polyfile/magic_defs/xwindows +43 -0
- polyfile/magic_defs/yara +17 -0
- polyfile/magic_defs/zfs +96 -0
- polyfile/magic_defs/zilog +12 -0
- polyfile/magic_defs/zip +126 -0
- polyfile/magic_defs/zyxel +17 -0
- polyfile/nes.py +144 -0
- polyfile/nitf.py +15 -0
- polyfile/pdf.py +1264 -0
- polyfile/pickles.py +45 -0
- polyfile/polyfile.py +409 -0
- polyfile/profiling.py +115 -0
- polyfile/repl.py +624 -0
- polyfile/search.py +310 -0
- polyfile/serialization.py +323 -0
- polyfile/structmatcher.py +46 -0
- polyfile/structs.py +281 -0
- polyfile/templates/download.js +162 -0
- polyfile/templates/hexdump.css +268 -0
- polyfile/templates/hexdump.js +756 -0
- polyfile/templates/jquery-3.4.1.min.js +2 -0
- polyfile/templates/template.html +119 -0
- polyfile/wildcards.py +62 -0
- polyfile/zipmatcher.py +183 -0
- polyfile_weave-0.5.5.dist-info/METADATA +173 -0
- polyfile_weave-0.5.5.dist-info/RECORD +585 -0
- polyfile_weave-0.5.5.dist-info/WHEEL +5 -0
- polyfile_weave-0.5.5.dist-info/entry_points.txt +2 -0
- polyfile_weave-0.5.5.dist-info/licenses/LICENSE +202 -0
- polyfile_weave-0.5.5.dist-info/top_level.txt +2 -0
- polymerge/__init__.py +1 -0
- polymerge/__main__.py +296 -0
- polymerge/cfg.py +127 -0
- polymerge/polymerge.py +227 -0
- polymerge/polytracker.py +190 -0
polyfile/magic.py
ADDED
|
@@ -0,0 +1,2983 @@
|
|
|
1
|
+
"""
|
|
2
|
+
A pure Python implementation of libmagic.
|
|
3
|
+
|
|
4
|
+
This is to avoid having libmagic be a dependency, as well as to add the ability for searching for matches at arbitrary
|
|
5
|
+
byte offsets.
|
|
6
|
+
|
|
7
|
+
This implementation is also optimized to only test for the file's MIME types; it skips all of the tests for printing
|
|
8
|
+
details about the file.
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
from abc import ABC, abstractmethod
|
|
12
|
+
from collections import defaultdict
|
|
13
|
+
import csv
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from enum import Enum, IntFlag
|
|
16
|
+
from importlib import resources
|
|
17
|
+
from io import StringIO
|
|
18
|
+
import json
|
|
19
|
+
import logging
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
import re
|
|
22
|
+
import struct
|
|
23
|
+
import sys
|
|
24
|
+
from time import gmtime, localtime, strftime
|
|
25
|
+
from typing import (
|
|
26
|
+
Any, BinaryIO, Callable, Dict, Generic, Iterable, Iterator, List, Optional, Set, Tuple, Type, TypeVar, Union
|
|
27
|
+
)
|
|
28
|
+
from uuid import UUID
|
|
29
|
+
|
|
30
|
+
from chardet.universaldetector import UniversalDetector
|
|
31
|
+
|
|
32
|
+
from .arithmetic import CStyleInt, make_c_style_int
|
|
33
|
+
from .fileutils import Streamable
|
|
34
|
+
from .iterators import LazyIterableSet
|
|
35
|
+
from .logger import getStatusLogger, TRACE
|
|
36
|
+
from .repl import ANSIColor, ANSIWriter
|
|
37
|
+
|
|
38
|
+
from . import magic_defs
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
if sys.version_info < (3, 9):
|
|
42
|
+
from typing import Pattern
|
|
43
|
+
else:
|
|
44
|
+
from re import Pattern
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
log = getStatusLogger("libmagic")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
if sys.version_info < (3, 11):
|
|
51
|
+
def get_resource_path(name: str) -> Path:
|
|
52
|
+
with resources.path(magic_defs, name) as path:
|
|
53
|
+
return path
|
|
54
|
+
|
|
55
|
+
def get_resource_contents(package):
|
|
56
|
+
return resources.contents(package)
|
|
57
|
+
else:
|
|
58
|
+
def get_resource_path(name: str) -> Path:
|
|
59
|
+
with resources.as_file(resources.files(magic_defs).joinpath(name)) as f:
|
|
60
|
+
return f
|
|
61
|
+
|
|
62
|
+
def get_resource_contents(package):
|
|
63
|
+
return (resource.name for resource in resources.files(package).iterdir() if resource.is_file())
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
MAGIC_DEFS: List[Path] = [
|
|
67
|
+
get_resource_path(resource_name)
|
|
68
|
+
for resource_name in get_resource_contents(magic_defs)
|
|
69
|
+
if resource_name not in ("COPYING", "magic.mgc", "__pycache__") and not resource_name.startswith(".")
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
WHITESPACE: bytes = b" \r\t\n\v\f"
|
|
74
|
+
ESCAPES = {
|
|
75
|
+
"n": ord("\n"),
|
|
76
|
+
"r": ord("\r"),
|
|
77
|
+
"b": ord("\b"),
|
|
78
|
+
"v": ord("\v"),
|
|
79
|
+
"t": ord("\t"),
|
|
80
|
+
"f": ord("\f")
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def unescape(to_unescape: Union[str, bytes]) -> bytes:
|
|
85
|
+
"""Processes unicode escape sequences. Also handles libmagic's support for single digit `\\x#` hex escapes."""
|
|
86
|
+
# first, process single digit hex escapes:
|
|
87
|
+
b = bytearray()
|
|
88
|
+
escaped: Optional[str] = None
|
|
89
|
+
if isinstance(to_unescape, str):
|
|
90
|
+
to_unescape = to_unescape.encode("utf-8")
|
|
91
|
+
for c in to_unescape:
|
|
92
|
+
if escaped is not None:
|
|
93
|
+
char = chr(c)
|
|
94
|
+
if escaped.isnumeric():
|
|
95
|
+
if not char.isnumeric() or len(escaped) == 3 or not int(char) < 8:
|
|
96
|
+
# this is an octal escape sequence like "\1", "\12", or "\123"
|
|
97
|
+
b.append(int(escaped, 8))
|
|
98
|
+
escaped = None
|
|
99
|
+
else:
|
|
100
|
+
escaped = f"{escaped}{char}"
|
|
101
|
+
continue
|
|
102
|
+
elif escaped.startswith("x"):
|
|
103
|
+
# we are processing a hex escape
|
|
104
|
+
if not char.isnumeric() and not ord("a") <= c <= ord("f") and not ord("A") <= c <= ord("F"):
|
|
105
|
+
if len(escaped) == 1:
|
|
106
|
+
raise ValueError(f"Invalid \\x hex escape in {to_unescape!r}")
|
|
107
|
+
b.append(int(escaped[1:], 16))
|
|
108
|
+
escaped = None
|
|
109
|
+
elif len(escaped) == 2:
|
|
110
|
+
b.append(int(f"{escaped[1:]}{char}", 16))
|
|
111
|
+
escaped = None
|
|
112
|
+
continue
|
|
113
|
+
else:
|
|
114
|
+
escaped = f"{escaped}{char}"
|
|
115
|
+
continue
|
|
116
|
+
elif not escaped:
|
|
117
|
+
# the last character was a '\' and this is the first character of the escape
|
|
118
|
+
if char == "x" or char.isnumeric():
|
|
119
|
+
# The escape is either a hex or octal escape
|
|
120
|
+
escaped = char
|
|
121
|
+
elif char in ESCAPES:
|
|
122
|
+
b.append(ESCAPES[char])
|
|
123
|
+
escaped = None
|
|
124
|
+
else:
|
|
125
|
+
b.append(c)
|
|
126
|
+
escaped = None
|
|
127
|
+
continue
|
|
128
|
+
assert escaped is None
|
|
129
|
+
if c == ord("\\"):
|
|
130
|
+
escaped = ""
|
|
131
|
+
else:
|
|
132
|
+
b.append(c)
|
|
133
|
+
if escaped is not None:
|
|
134
|
+
if escaped.startswith("x"):
|
|
135
|
+
if len(escaped) == 1:
|
|
136
|
+
raise ValueError(f"Invalid \\x hex escape in {to_unescape!r}")
|
|
137
|
+
else:
|
|
138
|
+
b.append(int(escaped[1:], 16))
|
|
139
|
+
elif escaped.isnumeric():
|
|
140
|
+
b.append(int(escaped, 8))
|
|
141
|
+
else:
|
|
142
|
+
raise ValueError(f"Unterminated escape in {to_unescape!r}")
|
|
143
|
+
return bytes(b)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class TestResult(ABC):
|
|
147
|
+
def __init__(self, test: "MagicTest", offset: int, parent: Optional["TestResult"] = None):
|
|
148
|
+
self.test: MagicTest = test
|
|
149
|
+
self.offset: int = offset
|
|
150
|
+
self.parent: Optional["TestResult"] = parent
|
|
151
|
+
if parent is not None and bool(self):
|
|
152
|
+
assert self.test.named_test is self.test or parent.test.level == self.test.level - 1
|
|
153
|
+
if not isinstance(self.test, UseTest):
|
|
154
|
+
parent.child_matched = True
|
|
155
|
+
self._child_matched: bool = False
|
|
156
|
+
|
|
157
|
+
@abstractmethod
|
|
158
|
+
def explain(self, writer: ANSIWriter, file: Streamable):
|
|
159
|
+
raise NotImplementedError()
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def child_matched(self) -> bool:
|
|
163
|
+
return self._child_matched
|
|
164
|
+
|
|
165
|
+
@child_matched.setter
|
|
166
|
+
def child_matched(self, did_match: bool):
|
|
167
|
+
if did_match and isinstance(self.test, NamedTest):
|
|
168
|
+
assert isinstance(self.parent.test, UseTest)
|
|
169
|
+
self.parent.child_matched = True
|
|
170
|
+
if self.parent.parent is not None:
|
|
171
|
+
self.parent.parent.child_matched = True
|
|
172
|
+
self._child_matched = did_match
|
|
173
|
+
|
|
174
|
+
def __hash__(self):
|
|
175
|
+
return hash((self.test, self.offset))
|
|
176
|
+
|
|
177
|
+
def __eq__(self, other):
|
|
178
|
+
return isinstance(other, TestResult) and other.test == self.test and other.offset == self.offset
|
|
179
|
+
|
|
180
|
+
@abstractmethod
|
|
181
|
+
def __bool__(self):
|
|
182
|
+
raise NotImplementedError()
|
|
183
|
+
|
|
184
|
+
def __repr__(self):
|
|
185
|
+
return f"{self.__class__.__name__}(test={self.test!r}, offset={self.offset}, parent={self.parent!r})"
|
|
186
|
+
|
|
187
|
+
def __str__(self):
|
|
188
|
+
if self.test.message is not None:
|
|
189
|
+
# TODO: Fix pasting our value in
|
|
190
|
+
return str(self.test.message)
|
|
191
|
+
#if self.value is not None and "%" in self.test.message:
|
|
192
|
+
# return self.test.message % (self.value,)
|
|
193
|
+
#else:
|
|
194
|
+
# return self.test.message
|
|
195
|
+
else:
|
|
196
|
+
return f"Match[{self.offset}]"
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class MatchedTest(TestResult):
|
|
200
|
+
def __init__(
|
|
201
|
+
self, test: "MagicTest",
|
|
202
|
+
value: Any,
|
|
203
|
+
offset: int,
|
|
204
|
+
length: int,
|
|
205
|
+
parent: Optional["TestResult"] = None
|
|
206
|
+
):
|
|
207
|
+
super().__init__(test=test, offset=offset, parent=parent)
|
|
208
|
+
self.value: Any = value
|
|
209
|
+
self.length: int = length
|
|
210
|
+
|
|
211
|
+
def explain(self, writer: ANSIWriter, file: Streamable):
|
|
212
|
+
if self.parent is not None:
|
|
213
|
+
self.parent.explain(writer, file=file)
|
|
214
|
+
indent = self.test.write(writer)
|
|
215
|
+
if not isinstance(self.test, (NamedTest, UseTest)):
|
|
216
|
+
writer.write(f"{indent}Matched ", bold=True, color=ANSIColor.GREEN)
|
|
217
|
+
writer.write(str(self.length), bold=True)
|
|
218
|
+
writer.write(f" byte{['','s'][self.length != 1]} at offset ", bold=True, color=ANSIColor.GREEN)
|
|
219
|
+
writer.write(f"{self.offset}\n", bold=True)
|
|
220
|
+
writer.write_context(file, offset=self.offset, context_bytes=max(0, (80 - len(indent) - self.length) // 2),
|
|
221
|
+
num_bytes=self.length, indent=indent)
|
|
222
|
+
|
|
223
|
+
def __hash__(self):
|
|
224
|
+
return hash((self.test, self.offset, self.length))
|
|
225
|
+
|
|
226
|
+
def __eq__(self, other):
|
|
227
|
+
return isinstance(other, MatchedTest) and other.test == self.test and other.offset == self.offset \
|
|
228
|
+
and other.length == self.length
|
|
229
|
+
|
|
230
|
+
def __bool__(self):
|
|
231
|
+
return True
|
|
232
|
+
|
|
233
|
+
def __repr__(self):
|
|
234
|
+
return f"{self.__class__.__name__}(test={self.test!r}, offset={self.offset}, length={self.length}, " \
|
|
235
|
+
f"parent={self.parent!r})"
|
|
236
|
+
|
|
237
|
+
def __str__(self):
|
|
238
|
+
if self.test.message is not None:
|
|
239
|
+
# TODO: Fix pasting our value in
|
|
240
|
+
return str(self.test.message)
|
|
241
|
+
#if self.value is not None and "%" in self.test.message:
|
|
242
|
+
# return self.test.message % (self.value,)
|
|
243
|
+
#else:
|
|
244
|
+
# return self.test.message
|
|
245
|
+
else:
|
|
246
|
+
return f"Match[{self.offset}:{self.offset + self.length}]"
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
class FailedTest(TestResult):
|
|
250
|
+
def __init__(self, test: "MagicTest", offset: int, message: str, parent: Optional["TestResult"] = None):
|
|
251
|
+
super().__init__(test=test, offset=offset, parent=parent)
|
|
252
|
+
self.message: str = message
|
|
253
|
+
|
|
254
|
+
def __bool__(self):
|
|
255
|
+
return False
|
|
256
|
+
|
|
257
|
+
def explain(self, writer: ANSIWriter, file: Streamable):
|
|
258
|
+
writer.write(f"{self.test} did not match at offset {self.offset} because {self.message}\n", dim=True)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class Endianness(Enum):
|
|
262
|
+
NATIVE = "="
|
|
263
|
+
LITTLE = "<"
|
|
264
|
+
BIG = ">"
|
|
265
|
+
PDP = "me"
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def parse_numeric(text: Union[str, bytes]) -> int:
|
|
269
|
+
if isinstance(text, bytes):
|
|
270
|
+
text = text.decode("utf-8")
|
|
271
|
+
text = text.strip()
|
|
272
|
+
if text.startswith("-"):
|
|
273
|
+
factor = -1
|
|
274
|
+
text = text[1:]
|
|
275
|
+
else:
|
|
276
|
+
factor = 1
|
|
277
|
+
if text.startswith("+"):
|
|
278
|
+
text = text[1:]
|
|
279
|
+
if text.endswith("L"):
|
|
280
|
+
text = text[:-1]
|
|
281
|
+
if text.startswith("0x") or text.startswith("0X"):
|
|
282
|
+
if text.lower().endswith("h"):
|
|
283
|
+
# Some hex constants now end with "h" 🤷
|
|
284
|
+
# (see https://github.com/file/file/blob/7a4e60a8f56ed45f76f28d2812a88d82efdc4bb8/magic/Magdir/sniffer#L369)
|
|
285
|
+
text = text[:-1]
|
|
286
|
+
return int(text, 16) * factor
|
|
287
|
+
elif text.startswith("0") and len(text) > 1:
|
|
288
|
+
return int(text, 8) * factor
|
|
289
|
+
else:
|
|
290
|
+
return int(text) * factor
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
class Offset(ABC):
|
|
294
|
+
@abstractmethod
|
|
295
|
+
def to_absolute(self, data: bytes, last_match: Optional[TestResult], allow_invalid: bool = False) -> int:
|
|
296
|
+
raise NotImplementedError()
|
|
297
|
+
|
|
298
|
+
@staticmethod
|
|
299
|
+
def parse(offset: str) -> "Offset":
|
|
300
|
+
if offset.startswith("&"):
|
|
301
|
+
return RelativeOffset(Offset.parse(offset[1:]))
|
|
302
|
+
elif offset.startswith("("):
|
|
303
|
+
return IndirectOffset.parse(offset)
|
|
304
|
+
elif offset.startswith("-"):
|
|
305
|
+
return NegativeOffset(parse_numeric(offset[1:]))
|
|
306
|
+
else:
|
|
307
|
+
return AbsoluteOffset(parse_numeric(offset))
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
class InvalidOffsetError(IndexError):
|
|
311
|
+
def __init__(self, message: Optional[str] = None, offset: Optional[Offset] = None):
|
|
312
|
+
if message is None:
|
|
313
|
+
if offset is not None:
|
|
314
|
+
message = f"Invalid Offset: {offset!r}"
|
|
315
|
+
else:
|
|
316
|
+
message = "Invalid Offset"
|
|
317
|
+
super().__init__(message)
|
|
318
|
+
self.offset: Optional[Offset] = offset
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
class AbsoluteOffset(Offset):
|
|
322
|
+
def __init__(self, offset: int):
|
|
323
|
+
self.offset: int = offset
|
|
324
|
+
|
|
325
|
+
def to_absolute(self, data: bytes, last_match: Optional[TestResult], allow_invalid: bool = False) -> int:
|
|
326
|
+
if not allow_invalid and self.offset >= len(data):
|
|
327
|
+
raise InvalidOffsetError(offset=self)
|
|
328
|
+
return self.offset
|
|
329
|
+
|
|
330
|
+
def __repr__(self):
|
|
331
|
+
return f"{self.__class__.__name__}(offset={self.offset})"
|
|
332
|
+
|
|
333
|
+
def __str__(self):
|
|
334
|
+
return str(self.offset)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
class NamedAbsoluteOffset(AbsoluteOffset):
|
|
338
|
+
def __init__(self, test: "NamedTest", offset: int):
|
|
339
|
+
super().__init__(offset)
|
|
340
|
+
self.test: NamedTest = test
|
|
341
|
+
|
|
342
|
+
def to_absolute(self, data: bytes, last_match: Optional[TestResult], allow_invalid: bool = False) -> int:
|
|
343
|
+
while last_match is not None and not last_match.test is self.test:
|
|
344
|
+
last_match = last_match.parent
|
|
345
|
+
|
|
346
|
+
if last_match is not None:
|
|
347
|
+
# At this point, last_match should be equal to the match generated from the NamedTest,
|
|
348
|
+
# and its parent should be the match associated with the UseTest
|
|
349
|
+
last_match = last_match.parent
|
|
350
|
+
|
|
351
|
+
if last_match is None:
|
|
352
|
+
raise ValueError(f"Could not resolve the match associated with {self!r}")
|
|
353
|
+
|
|
354
|
+
assert isinstance(last_match.test, UseTest)
|
|
355
|
+
|
|
356
|
+
if not allow_invalid and last_match.offset + self.offset >= len(data):
|
|
357
|
+
raise InvalidOffsetError(offset=self)
|
|
358
|
+
return last_match.offset + self.offset
|
|
359
|
+
|
|
360
|
+
def __repr__(self):
|
|
361
|
+
return f"{self.__class__.__name__}(test={self.test!r}, offset={self.offset})"
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
class NegativeOffset(Offset):
|
|
365
|
+
def __init__(self, magnitude: int):
|
|
366
|
+
self.magnitude: int = magnitude
|
|
367
|
+
|
|
368
|
+
def to_absolute(self, data: bytes, last_match: Optional[TestResult], allow_invalid: bool = False) -> int:
|
|
369
|
+
if not allow_invalid and self.magnitude > len(data):
|
|
370
|
+
raise InvalidOffsetError(offset=self)
|
|
371
|
+
return len(data) - self.magnitude
|
|
372
|
+
|
|
373
|
+
def __repr__(self):
|
|
374
|
+
return f"{self.__class__.__name__}(magnitude={self.magnitude})"
|
|
375
|
+
|
|
376
|
+
def __str__(self):
|
|
377
|
+
return f"{self.magnitude}"
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
class RelativeOffset(Offset):
|
|
381
|
+
def __init__(self, relative_to: Offset):
|
|
382
|
+
self.relative_to: Offset = relative_to
|
|
383
|
+
|
|
384
|
+
def to_absolute(self, data: bytes, last_match: Optional[TestResult], allow_invalid: bool = False) -> int:
|
|
385
|
+
if isinstance(self.relative_to, NegativeOffset):
|
|
386
|
+
difference = -self.relative_to.magnitude
|
|
387
|
+
else:
|
|
388
|
+
difference = self.relative_to.to_absolute(data, last_match)
|
|
389
|
+
if not isinstance(last_match, MatchedTest):
|
|
390
|
+
raise InvalidOffsetError(f"The last test was expected to be a match, but instead got {last_match!s}",
|
|
391
|
+
offset=self)
|
|
392
|
+
offset = last_match.offset + last_match.length + difference
|
|
393
|
+
if not allow_invalid and len(data) < offset < 0:
|
|
394
|
+
raise InvalidOffsetError(offset=self)
|
|
395
|
+
return offset
|
|
396
|
+
|
|
397
|
+
def __repr__(self):
|
|
398
|
+
return f"{self.__class__.__name__}(relative_to={self.relative_to})"
|
|
399
|
+
|
|
400
|
+
def __str__(self):
|
|
401
|
+
return f"&{self.relative_to}"
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
class IndirectOffset(Offset):
|
|
405
|
+
OctalIndirectOffset = -1
|
|
406
|
+
|
|
407
|
+
def __init__(self, offset: Offset, num_bytes: int, endianness: Endianness, signed: bool,
|
|
408
|
+
post_process: Callable[[int], int] = lambda n: n):
|
|
409
|
+
self.offset: Offset = offset
|
|
410
|
+
self.num_bytes: int = num_bytes
|
|
411
|
+
self.endianness: Endianness = endianness
|
|
412
|
+
self.signed: bool = signed
|
|
413
|
+
self.post_process: Callable[[int], int] = post_process
|
|
414
|
+
if self.endianness != Endianness.LITTLE and self.endianness != endianness.BIG:
|
|
415
|
+
raise ValueError(f"Invalid endianness: {endianness!r}")
|
|
416
|
+
elif num_bytes not in (1, 2, 4, 8, IndirectOffset.OctalIndirectOffset):
|
|
417
|
+
raise ValueError(f"Invalid number of bytes: {num_bytes}")
|
|
418
|
+
|
|
419
|
+
def to_absolute(self, data: bytes, last_match: Optional[TestResult], allow_invalid: bool = False) -> int:
|
|
420
|
+
if self.num_bytes == IndirectOffset.OctalIndirectOffset:
|
|
421
|
+
# Special case: This is for the new octal type used here:
|
|
422
|
+
# https://github.com/file/file/blob/7a4e60a8f56ed45f76f28d2812a88d82efdc4bb8/magic/Magdir/gentoo#L81
|
|
423
|
+
offset = self.offset.to_absolute(data, last_match)
|
|
424
|
+
octal_string_end = offset
|
|
425
|
+
while octal_string_end < len(data) and ord('0') <= data[octal_string_end] <= ord('7'):
|
|
426
|
+
octal_string_end += 1
|
|
427
|
+
value: Optional[int] = None
|
|
428
|
+
if octal_string_end > offset:
|
|
429
|
+
try:
|
|
430
|
+
value = int(data[:octal_string_end], 8)
|
|
431
|
+
except ValueError:
|
|
432
|
+
pass
|
|
433
|
+
if value is None:
|
|
434
|
+
if allow_invalid:
|
|
435
|
+
value = 0
|
|
436
|
+
else:
|
|
437
|
+
return len(data)
|
|
438
|
+
# raise ValueError(f"Invalid octal string expected for {self} at file offset {offset}")
|
|
439
|
+
return self.post_process(value)
|
|
440
|
+
elif self.num_bytes == 1:
|
|
441
|
+
fmt = "B"
|
|
442
|
+
elif self.num_bytes == 2:
|
|
443
|
+
fmt = "H"
|
|
444
|
+
elif self.num_bytes == 8:
|
|
445
|
+
fmt = "Q"
|
|
446
|
+
else:
|
|
447
|
+
fmt = "I"
|
|
448
|
+
if self.signed:
|
|
449
|
+
fmt = fmt.lower()
|
|
450
|
+
if self.endianness == Endianness.LITTLE:
|
|
451
|
+
fmt = f"<{fmt}"
|
|
452
|
+
else:
|
|
453
|
+
fmt = f">{fmt}"
|
|
454
|
+
offset = self.offset.to_absolute(data, last_match)
|
|
455
|
+
to_unpack = data[offset:offset + self.num_bytes]
|
|
456
|
+
if len(to_unpack) < self.num_bytes:
|
|
457
|
+
if allow_invalid:
|
|
458
|
+
return len(data)
|
|
459
|
+
else:
|
|
460
|
+
raise InvalidOffsetError(offset=self)
|
|
461
|
+
return self.post_process(struct.unpack(fmt, to_unpack)[0])
|
|
462
|
+
|
|
463
|
+
NUMBER_PATTERN: str = r"(0[xX][\dA-Fa-f]+|\d+)L?"
|
|
464
|
+
INDIRECT_OFFSET_PATTERN: Pattern[str] = re.compile(
|
|
465
|
+
r"^\("
|
|
466
|
+
rf"(?P<offset>&?-?{NUMBER_PATTERN})"
|
|
467
|
+
r"((?P<signedness>[.,])(?P<type>[bBcCeEfFgGhHiILlmsSqQo]))?"
|
|
468
|
+
rf"(?P<post_process>[*&/]?[+-]?({NUMBER_PATTERN}|\(-?{NUMBER_PATTERN}\)))?"
|
|
469
|
+
r"\)$"
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
@classmethod
|
|
473
|
+
def parse(cls, offset: str) -> "IndirectOffset":
|
|
474
|
+
m = cls.INDIRECT_OFFSET_PATTERN.match(offset)
|
|
475
|
+
if not m:
|
|
476
|
+
raise ValueError(f"Invalid indirect offset: {offset!r}")
|
|
477
|
+
t = m.group("type")
|
|
478
|
+
if t is None:
|
|
479
|
+
t = "I"
|
|
480
|
+
if t == "m":
|
|
481
|
+
raise NotImplementedError("TODO: Add support for middle endianness")
|
|
482
|
+
elif t.islower():
|
|
483
|
+
endianness = Endianness.LITTLE
|
|
484
|
+
else:
|
|
485
|
+
endianness = Endianness.BIG
|
|
486
|
+
t = t.lower()
|
|
487
|
+
if t in ("b", "c"):
|
|
488
|
+
num_bytes = 1
|
|
489
|
+
elif t in ("e", "f", "g", "q"):
|
|
490
|
+
num_bytes = 8
|
|
491
|
+
elif t in ("h", "s"):
|
|
492
|
+
num_bytes = 2
|
|
493
|
+
elif t in ("i", "l"):
|
|
494
|
+
# TODO: Confirm that "l" should really be here
|
|
495
|
+
num_bytes = 4
|
|
496
|
+
elif t in ("o",):
|
|
497
|
+
num_bytes = IndirectOffset.OctalIndirectOffset
|
|
498
|
+
else:
|
|
499
|
+
raise ValueError(f"Unsupported indirect specifier type: {m.group('type')!r}")
|
|
500
|
+
pp = m.group("post_process")
|
|
501
|
+
if pp is None:
|
|
502
|
+
post_process = lambda n: n
|
|
503
|
+
else:
|
|
504
|
+
multiply = pp.startswith("*")
|
|
505
|
+
bitwise_and = pp.startswith("&")
|
|
506
|
+
divide = pp.startswith("/")
|
|
507
|
+
if multiply or bitwise_and or divide:
|
|
508
|
+
pp = pp[1:]
|
|
509
|
+
if pp.startswith("+"):
|
|
510
|
+
pp = pp[1:]
|
|
511
|
+
if pp.startswith("(") and pp.endswith(")"):
|
|
512
|
+
# some definition files like `msdos` have indirect offsets of the form: >>>(&0x0f.l+(-4))
|
|
513
|
+
# Handle those nested parenthesis around the `(-4)` here. This is an undocumented part of the DSL,
|
|
514
|
+
# so, TODO: confirm we are handling it properly and it's not something more complex like a nested
|
|
515
|
+
# indirect offset
|
|
516
|
+
pp = pp[1:-1]
|
|
517
|
+
operand = parse_numeric(pp)
|
|
518
|
+
if multiply:
|
|
519
|
+
post_process = lambda n: n * operand
|
|
520
|
+
elif bitwise_and:
|
|
521
|
+
post_process = lambda n: n & operand
|
|
522
|
+
elif divide:
|
|
523
|
+
post_process = lambda n: n // operand
|
|
524
|
+
else:
|
|
525
|
+
post_process = lambda n: n + operand
|
|
526
|
+
return IndirectOffset(
|
|
527
|
+
offset=Offset.parse(m.group("offset")),
|
|
528
|
+
num_bytes=num_bytes,
|
|
529
|
+
endianness=endianness,
|
|
530
|
+
signed=m.group("signedness") == ",",
|
|
531
|
+
post_process=post_process
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
def __repr__(self):
|
|
535
|
+
return f"{self.__class__.__name__}(offset={self.offset!r}, num_bytes={self.num_bytes}, "\
|
|
536
|
+
f"endianness={self.endianness!r}, signed={self.signed}, post_process={self.post_process!r})"
|
|
537
|
+
|
|
538
|
+
def __str__(self):
|
|
539
|
+
if self.num_bytes == IndirectOffset.OctalIndirectOffset:
|
|
540
|
+
num_bytes = "o"
|
|
541
|
+
else:
|
|
542
|
+
num_bytes = str(self.num_bytes)
|
|
543
|
+
return f"({self.offset!s}{['.', ','][self.signed]}{num_bytes}{self.endianness.value})"
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
class SourceInfo:
|
|
547
|
+
def __init__(self, path: Path, line: int, original_line: Optional[str] = None):
|
|
548
|
+
self.path: Path = path
|
|
549
|
+
self.line: int = line
|
|
550
|
+
self.original_line: Optional[str] = original_line
|
|
551
|
+
|
|
552
|
+
def __repr__(self):
|
|
553
|
+
return f"{self.__class__.__name__}(path={self.path!r}, line={self.line}, original_line={self.original_line!r})"
|
|
554
|
+
|
|
555
|
+
def __str__(self):
|
|
556
|
+
return f"{self.path!s}:{self.line}"
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
class MatchContext:
|
|
560
|
+
def __init__(self, data: bytes, path: Optional[Path] = None, only_match_mime: bool = False):
|
|
561
|
+
self.data: bytes = data
|
|
562
|
+
self.path: Optional[Path] = path
|
|
563
|
+
self.only_match_mime: bool = only_match_mime
|
|
564
|
+
|
|
565
|
+
def __getitem__(self, s: slice) -> "MatchContext":
|
|
566
|
+
if not isinstance(s, slice):
|
|
567
|
+
raise ValueError("Match contexts can only be sliced")
|
|
568
|
+
return MatchContext(data=self.data[s], path=self.path, only_match_mime=self.only_match_mime)
|
|
569
|
+
|
|
570
|
+
@property
|
|
571
|
+
def is_executable(self) -> bool:
|
|
572
|
+
if self.path is None:
|
|
573
|
+
log.warning("Unable to determine if the input data is executable; assuming it is not.")
|
|
574
|
+
return False
|
|
575
|
+
try:
|
|
576
|
+
return bool(self.path.stat().st_mode & 0o111)
|
|
577
|
+
except FileNotFoundError:
|
|
578
|
+
log.warning(f"Unable to determine if the data from {self.path} is executable; assuming it is not.")
|
|
579
|
+
return False
|
|
580
|
+
|
|
581
|
+
@staticmethod
|
|
582
|
+
def load(stream_or_path: Union[str, Path, BinaryIO], only_match_mime: bool = False) -> "MatchContext":
|
|
583
|
+
if isinstance(stream_or_path, str) or isinstance(stream_or_path, Path):
|
|
584
|
+
with open(stream_or_path, "rb") as f:
|
|
585
|
+
return MatchContext.load(f, only_match_mime)
|
|
586
|
+
if hasattr(stream_or_path, "name") and stream_or_path.name is not None:
|
|
587
|
+
path: Optional[Path] = Path(stream_or_path.name)
|
|
588
|
+
else:
|
|
589
|
+
path = None
|
|
590
|
+
return MatchContext(stream_or_path.read(), path, only_match_mime)
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
class Message(ABC):
|
|
594
|
+
@abstractmethod
|
|
595
|
+
def resolve(self, context: MatchContext) -> str:
|
|
596
|
+
raise NotImplementedError()
|
|
597
|
+
|
|
598
|
+
@abstractmethod
|
|
599
|
+
def possibilities(self) -> Iterator[str]:
|
|
600
|
+
raise NotImplementedError()
|
|
601
|
+
|
|
602
|
+
@staticmethod
|
|
603
|
+
def parse(message: str) -> "Message":
|
|
604
|
+
try:
|
|
605
|
+
return TernaryExecutableMessage.parse(message)
|
|
606
|
+
except ValueError:
|
|
607
|
+
return ConstantMessage(message)
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
class ConstantMessage(Message):
|
|
611
|
+
def __init__(self, message: str):
|
|
612
|
+
self.message: str = message
|
|
613
|
+
|
|
614
|
+
def possibilities(self) -> Iterator[str]:
|
|
615
|
+
yield self.message
|
|
616
|
+
|
|
617
|
+
def resolve(self, context: MatchContext) -> str:
|
|
618
|
+
return self.message
|
|
619
|
+
|
|
620
|
+
def __eq__(self, other):
|
|
621
|
+
return isinstance(other, ConstantMessage) and other.message == self.message
|
|
622
|
+
|
|
623
|
+
def __str__(self):
|
|
624
|
+
return self.message
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
class TernaryMessage(Message, ABC):
|
|
628
|
+
def __init__(self, true_value: str, false_value: str):
|
|
629
|
+
self.true_value: str = true_value
|
|
630
|
+
self.false_value: str = false_value
|
|
631
|
+
|
|
632
|
+
def possibilities(self) -> Iterator[str]:
|
|
633
|
+
yield self.true_value
|
|
634
|
+
yield self.false_value
|
|
635
|
+
|
|
636
|
+
def __eq__(self, other):
|
|
637
|
+
return isinstance(other, TernaryMessage) and other.false_value == self.false_value and \
|
|
638
|
+
other.true_value == self.true_value and other.__class__ == self.__class__
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
class TernaryExecutableMessage(TernaryMessage):
|
|
642
|
+
def resolve(self, context: MatchContext) -> str:
|
|
643
|
+
if context.is_executable:
|
|
644
|
+
return self.true_value
|
|
645
|
+
else:
|
|
646
|
+
return self.false_value
|
|
647
|
+
|
|
648
|
+
TERNARY_EXECUTABLE_PATTERN: Pattern[str] = re.compile(
|
|
649
|
+
r"^(?P<before>.*?)\${x\?(?P<true>[^:]+):(?P<false>[^}]+)}(?P<after>.*)$"
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
@staticmethod
|
|
653
|
+
def parse(message: str) -> "TernaryExecutableMessage":
|
|
654
|
+
m = TernaryExecutableMessage.TERNARY_EXECUTABLE_PATTERN.match(message)
|
|
655
|
+
if not m:
|
|
656
|
+
raise ValueError(f"Invalid ternary message: {message!r}")
|
|
657
|
+
before = m.group("before")
|
|
658
|
+
after = m.group("after")
|
|
659
|
+
true_msg = f"{before}{m.group('true')}{after}"
|
|
660
|
+
false_msg = f"{before}{m.group('false')}{after}"
|
|
661
|
+
return TernaryExecutableMessage(true_value=true_msg, false_value=false_msg)
|
|
662
|
+
|
|
663
|
+
def __str__(self):
|
|
664
|
+
return f"${{x?{self.true_value}:{self.false_value}}}"
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
TEST_TYPES: Set[Type["MagicTest"]] = set()
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
class Comment:
|
|
671
|
+
def __init__(self, message: str, source_info: Optional[SourceInfo] = None):
|
|
672
|
+
self.message: str = message
|
|
673
|
+
self.source_info: Optional[SourceInfo] = source_info
|
|
674
|
+
|
|
675
|
+
def __str__(self):
|
|
676
|
+
return self.message
|
|
677
|
+
|
|
678
|
+
|
|
679
|
+
class TestType(IntFlag):
|
|
680
|
+
UNKNOWN = 0
|
|
681
|
+
BINARY = 1
|
|
682
|
+
TEXT = 2
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
class MagicTest(ABC):
|
|
686
|
+
AUTO_REGISTER_TEST: bool = True
|
|
687
|
+
|
|
688
|
+
def __init__(
|
|
689
|
+
self,
|
|
690
|
+
offset: Offset,
|
|
691
|
+
mime: Optional[Union[str, TernaryExecutableMessage]] = None,
|
|
692
|
+
extensions: Iterable[str] = (),
|
|
693
|
+
message: Union[str, Message] = "",
|
|
694
|
+
parent: Optional["MagicTest"] = None,
|
|
695
|
+
comments: Iterable[Comment] = ()
|
|
696
|
+
):
|
|
697
|
+
self.offset: Offset = offset
|
|
698
|
+
self._mime: Optional[Message] = None
|
|
699
|
+
self.extensions: Set[str] = set(extensions)
|
|
700
|
+
if isinstance(message, Message):
|
|
701
|
+
self._message: Message = message
|
|
702
|
+
else:
|
|
703
|
+
self._message = Message.parse(message)
|
|
704
|
+
self._parent: Optional[MagicTest] = parent
|
|
705
|
+
self.children: List[MagicTest] = []
|
|
706
|
+
if parent is not None:
|
|
707
|
+
self.level: int = self.parent.level + 1
|
|
708
|
+
parent.children.append(self)
|
|
709
|
+
self.named_test: Optional[NamedTest] = parent.named_test
|
|
710
|
+
if self.named_test is not None and isinstance(offset, AbsoluteOffset):
|
|
711
|
+
self.offset = NamedAbsoluteOffset(self.named_test, offset.offset)
|
|
712
|
+
if mime is not None:
|
|
713
|
+
parent.can_match_mime = True
|
|
714
|
+
else:
|
|
715
|
+
self.level = 0
|
|
716
|
+
self.named_test: Optional[NamedTest] = None
|
|
717
|
+
self.can_match_mime: bool = mime is not None
|
|
718
|
+
"""
|
|
719
|
+
Whether or not this test or any of its descendants can match a MIME type.
|
|
720
|
+
This is currently set after parsing all of the definition files.
|
|
721
|
+
Any custom implementation should set it manually after this object is created.
|
|
722
|
+
|
|
723
|
+
"""
|
|
724
|
+
self.can_be_indirect: bool = False
|
|
725
|
+
"""
|
|
726
|
+
Whether or not this test or any of its descendants can be an indirect test.
|
|
727
|
+
This is currently set after parsing all of the definition files.
|
|
728
|
+
Any custom implementation should set it manually after this object is created.
|
|
729
|
+
|
|
730
|
+
"""
|
|
731
|
+
self.mime = mime
|
|
732
|
+
self.source_info: Optional[SourceInfo] = None
|
|
733
|
+
self.comments: Tuple[Comment, ...] = tuple(comments)
|
|
734
|
+
self._type: TestType = TestType.UNKNOWN
|
|
735
|
+
|
|
736
|
+
def __init_subclass__(cls, **kwargs):
|
|
737
|
+
if cls.AUTO_REGISTER_TEST:
|
|
738
|
+
TEST_TYPES.add(cls)
|
|
739
|
+
return super().__init_subclass__(**kwargs)
|
|
740
|
+
|
|
741
|
+
@property
|
|
742
|
+
def message(self) -> Message:
|
|
743
|
+
return self._message
|
|
744
|
+
|
|
745
|
+
@message.setter
|
|
746
|
+
def message(self, new_value: Message):
|
|
747
|
+
self._message = new_value
|
|
748
|
+
|
|
749
|
+
@property
|
|
750
|
+
def test_type(self) -> TestType:
|
|
751
|
+
if self._type == TestType.UNKNOWN:
|
|
752
|
+
if hasattr(self, "__calculating_test_type") and getattr(self, "__calculating_test_type"):
|
|
753
|
+
return TestType.UNKNOWN
|
|
754
|
+
setattr(self, "__calculating_test_type", True)
|
|
755
|
+
if self.can_be_indirect:
|
|
756
|
+
# indirect tests can execute any other (binary) test, so classify ourselves as binary
|
|
757
|
+
self._type = TestType.BINARY
|
|
758
|
+
else:
|
|
759
|
+
if any(bool(child.test_type & TestType.BINARY) for child in self.children):
|
|
760
|
+
self._type = TestType.BINARY
|
|
761
|
+
else:
|
|
762
|
+
self._type = self.subtest_type()
|
|
763
|
+
if (self._type == TestType.UNKNOWN and self.children) or bool(self._type & TestType.TEXT):
|
|
764
|
+
# A pattern is considered to be a text test when all its patterns are text patterns;
|
|
765
|
+
# otherwise, it is considered to be a binary pattern.
|
|
766
|
+
if all(bool(child.test_type & TestType.TEXT) for child in self.children):
|
|
767
|
+
self._type = TestType.TEXT
|
|
768
|
+
else:
|
|
769
|
+
self._type = TestType.UNKNOWN
|
|
770
|
+
delattr(self, "__calculating_test_type")
|
|
771
|
+
return self._type
|
|
772
|
+
|
|
773
|
+
@test_type.setter
|
|
774
|
+
def test_type(self, value: TestType):
|
|
775
|
+
if self._type != TestType.UNKNOWN:
|
|
776
|
+
if value != self._type:
|
|
777
|
+
raise ValueError(f"Cannot assign type {value} to test {self} because it already has value {value}")
|
|
778
|
+
else:
|
|
779
|
+
self._type = value
|
|
780
|
+
|
|
781
|
+
@abstractmethod
|
|
782
|
+
def subtest_type(self) -> TestType:
|
|
783
|
+
raise NotImplementedError()
|
|
784
|
+
|
|
785
|
+
@property
|
|
786
|
+
def parent(self) -> Optional["MagicTest"]:
|
|
787
|
+
return self._parent
|
|
788
|
+
|
|
789
|
+
def ancestors(self) -> Iterator["MagicTest"]:
|
|
790
|
+
"""Yields all ancestors of this test. NamedTest will also include all UseTest ancestors that call it."""
|
|
791
|
+
stack: List[MagicTest] = [self]
|
|
792
|
+
history: Set[MagicTest] = set(stack)
|
|
793
|
+
while stack:
|
|
794
|
+
test = stack.pop()
|
|
795
|
+
if test is not self:
|
|
796
|
+
yield test
|
|
797
|
+
if isinstance(test, NamedTest):
|
|
798
|
+
new_tests = test.used_by - history
|
|
799
|
+
stack.extend(new_tests)
|
|
800
|
+
history |= new_tests
|
|
801
|
+
if test.parent is not None and test.parent not in history:
|
|
802
|
+
stack.append(test.parent)
|
|
803
|
+
history.add(test.parent)
|
|
804
|
+
|
|
805
|
+
def descendants(self) -> Iterator["MagicTest"]:
|
|
806
|
+
"""
|
|
807
|
+
Yields all descendants of this test.
|
|
808
|
+
UseTests will also include all referenced NamedTests and their descendants.
|
|
809
|
+
|
|
810
|
+
"""
|
|
811
|
+
stack: List[MagicTest] = [self]
|
|
812
|
+
history: Set[MagicTest] = set(stack)
|
|
813
|
+
while stack:
|
|
814
|
+
test = stack.pop()
|
|
815
|
+
if test is not self:
|
|
816
|
+
yield test
|
|
817
|
+
new_tests = [child for child in test.children if child not in history]
|
|
818
|
+
stack.extend(reversed(new_tests))
|
|
819
|
+
history |= set(new_tests)
|
|
820
|
+
if isinstance(test, UseTest):
|
|
821
|
+
stack.append(test.referenced_test)
|
|
822
|
+
history.add(test.referenced_test)
|
|
823
|
+
|
|
824
|
+
def referenced_tests(self) -> Set["NamedTest"]:
|
|
825
|
+
result: Set[NamedTest] = set()
|
|
826
|
+
for child in self.children:
|
|
827
|
+
result |= child.referenced_tests()
|
|
828
|
+
return result
|
|
829
|
+
|
|
830
|
+
@property
|
|
831
|
+
def mime(self) -> Optional[Message]:
|
|
832
|
+
return self._mime
|
|
833
|
+
|
|
834
|
+
@mime.setter
|
|
835
|
+
def mime(self, new_mime: Optional[Union[str, Message]]):
|
|
836
|
+
if isinstance(new_mime, str):
|
|
837
|
+
new_mime = Message.parse(new_mime)
|
|
838
|
+
if self._mime is not None:
|
|
839
|
+
if self._mime == new_mime:
|
|
840
|
+
return
|
|
841
|
+
raise ValueError("The mime type of a test may not be changed once it is set")
|
|
842
|
+
elif new_mime is None:
|
|
843
|
+
# the mime is already None, and we are setting it to None, so just ignore
|
|
844
|
+
return
|
|
845
|
+
self._mime = new_mime
|
|
846
|
+
self.can_match_mime = True
|
|
847
|
+
|
|
848
|
+
def _mimetypes(self) -> Iterator[str]:
|
|
849
|
+
"""Yields all possible MIME types that this test or any of its descendants could match against"""
|
|
850
|
+
if not self.can_match_mime:
|
|
851
|
+
return
|
|
852
|
+
yielded: Set[str] = set()
|
|
853
|
+
if self.mime is not None:
|
|
854
|
+
yielded |= set(self.mime.possibilities())
|
|
855
|
+
yield from yielded
|
|
856
|
+
for d in self.descendants():
|
|
857
|
+
if d.mime is not None:
|
|
858
|
+
possibilities = set(d.mime.possibilities())
|
|
859
|
+
new_mimes = possibilities - yielded
|
|
860
|
+
yield from new_mimes
|
|
861
|
+
yielded |= new_mimes
|
|
862
|
+
|
|
863
|
+
def mimetypes(self) -> LazyIterableSet[str]:
|
|
864
|
+
"""Returns the set of all possible MIME types that this test or any of its descendants could match against"""
|
|
865
|
+
return LazyIterableSet(self._mimetypes())
|
|
866
|
+
|
|
867
|
+
def _all_extensions(self) -> Iterator[str]:
|
|
868
|
+
"""Yields all possible extensions that this test or any of its descendants could match against"""
|
|
869
|
+
yield from self.extensions
|
|
870
|
+
yielded = set(self.extensions)
|
|
871
|
+
for d in self.descendants():
|
|
872
|
+
new_extensions = d.extensions - yielded
|
|
873
|
+
yield from new_extensions
|
|
874
|
+
yielded |= new_extensions
|
|
875
|
+
|
|
876
|
+
def all_extensions(self) -> LazyIterableSet[str]:
|
|
877
|
+
"""Returns the set of all possible extensions that this test or any of its descendants could match against"""
|
|
878
|
+
return LazyIterableSet(self._all_extensions())
|
|
879
|
+
|
|
880
|
+
@abstractmethod
|
|
881
|
+
def test(self, data: bytes, absolute_offset: int, parent_match: Optional[TestResult]) -> TestResult:
|
|
882
|
+
raise NotImplementedError()
|
|
883
|
+
|
|
884
|
+
def write(self, writer: ANSIWriter, is_current_test: bool = False, pre_mime_text: str = "") -> str:
|
|
885
|
+
for comment in self.comments:
|
|
886
|
+
if comment.source_info is not None and comment.source_info.original_line is not None:
|
|
887
|
+
writer.write(f" {comment.source_info.path.name}", dim=True, color=ANSIColor.CYAN)
|
|
888
|
+
writer.write(":", dim=True)
|
|
889
|
+
writer.write(f"{comment.source_info.line}\t", dim=True, color=ANSIColor.CYAN)
|
|
890
|
+
writer.write(comment.source_info.original_line.strip(), dim=True)
|
|
891
|
+
writer.write("\n")
|
|
892
|
+
else:
|
|
893
|
+
writer.write(f" # {comment!s}\n", dim=True)
|
|
894
|
+
if is_current_test:
|
|
895
|
+
writer.write("→ ", bold=True)
|
|
896
|
+
else:
|
|
897
|
+
writer.write(" ")
|
|
898
|
+
if self.source_info is not None and self.source_info.original_line is not None:
|
|
899
|
+
source_prefix = f"{self.source_info.path.name}:{self.source_info.line}"
|
|
900
|
+
indent = f"{' ' * len(source_prefix)}\t"
|
|
901
|
+
writer.write(self.source_info.path.name, dim=True, color=ANSIColor.CYAN)
|
|
902
|
+
writer.write(":", dim=True)
|
|
903
|
+
writer.write(self.source_info.line, dim=True, color=ANSIColor.CYAN)
|
|
904
|
+
writer.write("\t")
|
|
905
|
+
writer.write(self.source_info.original_line.strip(), color=ANSIColor.BLUE, bold=True)
|
|
906
|
+
else:
|
|
907
|
+
indent = ""
|
|
908
|
+
writer.write(f"{'>' * self.level}{self.offset!s}\t")
|
|
909
|
+
writer.write(self.message, color=ANSIColor.BLUE, bold=True)
|
|
910
|
+
if self.level == 0:
|
|
911
|
+
if self.test_type & TestType.BINARY:
|
|
912
|
+
writer.write(f" \uF5BB BINARY TEST", color=ANSIColor.BLUE)
|
|
913
|
+
elif self.test_type & TestType.TEXT:
|
|
914
|
+
writer.write(f" \uF5B9 ASCII TEST", color=ANSIColor.BLUE)
|
|
915
|
+
writer.write(pre_mime_text)
|
|
916
|
+
if self.mime is not None:
|
|
917
|
+
writer.write(f"\n {indent}!:mime ", dim=True)
|
|
918
|
+
writer.write(self.mime, color=ANSIColor.BLUE)
|
|
919
|
+
for e in self.extensions:
|
|
920
|
+
writer.write(f"\n {indent}!:ext ", dim=True)
|
|
921
|
+
writer.write(str(e), color=ANSIColor.BLUE)
|
|
922
|
+
writer.write("\n")
|
|
923
|
+
return f" {indent}"
|
|
924
|
+
|
|
925
|
+
def calculate_absolute_offset(self, data: bytes, parent_match: Optional[TestResult] = None) -> int:
|
|
926
|
+
return self.offset.to_absolute(data, parent_match)
|
|
927
|
+
|
|
928
|
+
def _match(self, context: MatchContext, parent_match: Optional[TestResult] = None) -> Iterator[MatchedTest]:
|
|
929
|
+
if context.only_match_mime and not self.can_match_mime:
|
|
930
|
+
return
|
|
931
|
+
try:
|
|
932
|
+
absolute_offset = self.calculate_absolute_offset(context.data, parent_match)
|
|
933
|
+
except InvalidOffsetError:
|
|
934
|
+
return
|
|
935
|
+
m = self.test(context.data, absolute_offset, parent_match)
|
|
936
|
+
if logging.root.level <= TRACE and (bool(m) or self.level > 0):
|
|
937
|
+
log.trace(
|
|
938
|
+
f"{self.source_info!s}\t{bool(m)}\t{absolute_offset}\t"
|
|
939
|
+
f"{context.data[absolute_offset:absolute_offset + 20]!r}"
|
|
940
|
+
)
|
|
941
|
+
if bool(m):
|
|
942
|
+
if not context.only_match_mime or self.mime is not None:
|
|
943
|
+
yield m
|
|
944
|
+
for child in self.children:
|
|
945
|
+
if not context.only_match_mime or child.can_match_mime:
|
|
946
|
+
yield from child._match(context=context, parent_match=m)
|
|
947
|
+
|
|
948
|
+
def match(self, to_match: Union[bytes, BinaryIO, str, Path, MatchContext]) -> Iterator[TestResult]:
|
|
949
|
+
"""Yields all matches for the given data"""
|
|
950
|
+
if isinstance(to_match, bytes):
|
|
951
|
+
to_match = MatchContext(data=to_match)
|
|
952
|
+
elif not isinstance(to_match, MatchContext):
|
|
953
|
+
to_match = MatchContext.load(to_match)
|
|
954
|
+
return self._match(to_match)
|
|
955
|
+
|
|
956
|
+
def __str__(self):
|
|
957
|
+
if self.source_info is not None and self.source_info.original_line is not None:
|
|
958
|
+
s = f"{self.source_info.path.name}:{self.source_info.line} {self.source_info.original_line.strip()}"
|
|
959
|
+
else:
|
|
960
|
+
s = f"{'>' * self.level}{self.offset!s}\t{self.message}"
|
|
961
|
+
if self.mime is not None:
|
|
962
|
+
s = f"{s}\n!:mime\t{self.mime}"
|
|
963
|
+
for e in self.extensions:
|
|
964
|
+
s = f"{s}\n!:ext\t{e}"
|
|
965
|
+
return s
|
|
966
|
+
|
|
967
|
+
|
|
968
|
+
class DynamicMagicTest(MagicTest, ABC):
|
|
969
|
+
"""A test that can be bound with a dynamically generated message"""
|
|
970
|
+
|
|
971
|
+
def __init__(
|
|
972
|
+
self,
|
|
973
|
+
offset: Offset,
|
|
974
|
+
mime: Optional[Union[str, TernaryExecutableMessage]] = None,
|
|
975
|
+
extensions: Iterable[str] = (),
|
|
976
|
+
default_message: Union[str, Message] = "",
|
|
977
|
+
parent: Optional["MagicTest"] = None,
|
|
978
|
+
comments: Iterable[Comment] = ()
|
|
979
|
+
):
|
|
980
|
+
super().__init__(offset=offset, mime=mime, extensions=extensions, parent=parent, comments=comments,
|
|
981
|
+
message=default_message)
|
|
982
|
+
self._bound_message: Optional[Message] = None
|
|
983
|
+
|
|
984
|
+
@property
|
|
985
|
+
def default_message(self) -> Message:
|
|
986
|
+
return super().message
|
|
987
|
+
|
|
988
|
+
@property
|
|
989
|
+
def message(self) -> Message:
|
|
990
|
+
if self._bound_message is None:
|
|
991
|
+
return self.default_message
|
|
992
|
+
else:
|
|
993
|
+
return self._bound_message
|
|
994
|
+
|
|
995
|
+
def bind(self, message: Union[str, Message]) -> MagicTest:
|
|
996
|
+
if self._bound_message is not None:
|
|
997
|
+
raise ValueError(f"{self!r} already has a bound message: {self.message!s}")
|
|
998
|
+
elif not isinstance(message, Message):
|
|
999
|
+
message = Message.parse(message)
|
|
1000
|
+
result: DynamicMagicTest = type(f"Bound{self.__class__.__name__}", (self.__class__,), dict(self.__dict__))()
|
|
1001
|
+
result._bound_message = message
|
|
1002
|
+
return result
|
|
1003
|
+
|
|
1004
|
+
|
|
1005
|
+
TYPES_BY_NAME: Dict[str, "DataType"] = {}
|
|
1006
|
+
|
|
1007
|
+
|
|
1008
|
+
T = TypeVar("T")
|
|
1009
|
+
|
|
1010
|
+
|
|
1011
|
+
class DataTypeMatch:
|
|
1012
|
+
INVALID: "DataTypeMatch"
|
|
1013
|
+
|
|
1014
|
+
def __init__(self, raw_match: Optional[bytes] = None, value: Optional[Any] = None, initial_offset: int = 0):
|
|
1015
|
+
self.raw_match: Optional[bytes] = raw_match
|
|
1016
|
+
if value is None and raw_match is not None:
|
|
1017
|
+
self.value: Optional[bytes] = raw_match
|
|
1018
|
+
else:
|
|
1019
|
+
self.value = value
|
|
1020
|
+
self.initial_offset: int = initial_offset
|
|
1021
|
+
|
|
1022
|
+
def __bool__(self):
|
|
1023
|
+
return self.raw_match is not None
|
|
1024
|
+
|
|
1025
|
+
def __repr__(self):
|
|
1026
|
+
if self.initial_offset != 0:
|
|
1027
|
+
io = f", initial_offset={self.initial_offset}"
|
|
1028
|
+
else:
|
|
1029
|
+
io = ""
|
|
1030
|
+
return f"{self.__class__.__name__}(raw_match={self.raw_match!r}, value={self.value!r}{io})"
|
|
1031
|
+
|
|
1032
|
+
def __str__(self):
|
|
1033
|
+
if self.value is not None:
|
|
1034
|
+
return str(self.value)
|
|
1035
|
+
elif self.raw_match is None:
|
|
1036
|
+
return "DataTypeNoMatch"
|
|
1037
|
+
else:
|
|
1038
|
+
return repr(self.raw_match)
|
|
1039
|
+
|
|
1040
|
+
|
|
1041
|
+
DataTypeMatch.INVALID = DataTypeMatch()
|
|
1042
|
+
|
|
1043
|
+
|
|
1044
|
+
class DataType(ABC, Generic[T]):
|
|
1045
|
+
def __init__(self, name: str):
|
|
1046
|
+
self.name: str = name
|
|
1047
|
+
|
|
1048
|
+
def allows_invalid_offsets(self, expected: T) -> bool:
|
|
1049
|
+
return False
|
|
1050
|
+
|
|
1051
|
+
@abstractmethod
|
|
1052
|
+
def is_text(self, value: T) -> bool:
|
|
1053
|
+
raise NotImplementedError()
|
|
1054
|
+
|
|
1055
|
+
@abstractmethod
|
|
1056
|
+
def parse_expected(self, specification: str) -> T:
|
|
1057
|
+
raise NotImplementedError()
|
|
1058
|
+
|
|
1059
|
+
@abstractmethod
|
|
1060
|
+
def match(self, data: bytes, expected: T) -> DataTypeMatch:
|
|
1061
|
+
raise NotImplementedError()
|
|
1062
|
+
|
|
1063
|
+
@staticmethod
|
|
1064
|
+
def parse(fmt: str) -> "DataType":
|
|
1065
|
+
if fmt in TYPES_BY_NAME:
|
|
1066
|
+
return TYPES_BY_NAME[fmt]
|
|
1067
|
+
elif fmt.startswith("string") or fmt.startswith("ustring"):
|
|
1068
|
+
dt = StringType.parse(fmt)
|
|
1069
|
+
elif fmt == "lestring16":
|
|
1070
|
+
dt = UTF16Type(endianness=Endianness.LITTLE)
|
|
1071
|
+
elif fmt == "bestring16":
|
|
1072
|
+
dt = UTF16Type(endianness=Endianness.BIG)
|
|
1073
|
+
elif fmt.startswith("pstring"):
|
|
1074
|
+
dt = PascalStringType.parse(fmt)
|
|
1075
|
+
elif fmt.startswith("search"):
|
|
1076
|
+
dt = SearchType.parse(fmt)
|
|
1077
|
+
elif fmt.startswith("regex"):
|
|
1078
|
+
dt = RegexType.parse(fmt)
|
|
1079
|
+
elif fmt == "guid":
|
|
1080
|
+
dt = GUIDType()
|
|
1081
|
+
else:
|
|
1082
|
+
dt = NumericDataType.parse(fmt)
|
|
1083
|
+
if dt.name in TYPES_BY_NAME:
|
|
1084
|
+
# Sometimes a data type will change its name based on modifiers.
|
|
1085
|
+
# For example, string and pstring will always include their modifiers after their name
|
|
1086
|
+
dt = TYPES_BY_NAME[dt.name]
|
|
1087
|
+
else:
|
|
1088
|
+
TYPES_BY_NAME[dt.name] = dt
|
|
1089
|
+
TYPES_BY_NAME[fmt] = dt
|
|
1090
|
+
return dt
|
|
1091
|
+
|
|
1092
|
+
def __str__(self):
|
|
1093
|
+
return self.name
|
|
1094
|
+
|
|
1095
|
+
def __repr__(self):
|
|
1096
|
+
return f"{self.__class__.__name__}({self.name})"
|
|
1097
|
+
|
|
1098
|
+
|
|
1099
|
+
class UUIDWildcard:
|
|
1100
|
+
pass
|
|
1101
|
+
|
|
1102
|
+
|
|
1103
|
+
class GUIDType(DataType[Union[UUID, UUIDWildcard]]):
|
|
1104
|
+
def __init__(self):
|
|
1105
|
+
super().__init__("guid")
|
|
1106
|
+
|
|
1107
|
+
def is_text(self, value: Union[UUID, UUIDWildcard]) -> bool:
|
|
1108
|
+
return False
|
|
1109
|
+
|
|
1110
|
+
def parse_expected(self, specification: str) -> Union[UUID, UUIDWildcard]:
|
|
1111
|
+
if specification.strip() == "x":
|
|
1112
|
+
return UUIDWildcard()
|
|
1113
|
+
# there is a bug in the `asf` definition where a guid is missing its last two characters:
|
|
1114
|
+
if specification.strip().upper() == "B61BE100-5B4E-11CF-A8FD-00805F5C44":
|
|
1115
|
+
specification = "B61BE100-5B4E-11CF-A8FD-00805F5C442B"
|
|
1116
|
+
return UUID(str(specification.strip()))
|
|
1117
|
+
|
|
1118
|
+
def match(self, data: bytes, expected: Union[UUID, UUIDWildcard]) -> DataTypeMatch:
|
|
1119
|
+
if len(data) < 16:
|
|
1120
|
+
return DataTypeMatch.INVALID
|
|
1121
|
+
try:
|
|
1122
|
+
uuid = UUID(bytes_le=data[:16])
|
|
1123
|
+
except ValueError:
|
|
1124
|
+
return DataTypeMatch.INVALID
|
|
1125
|
+
if isinstance(expected, UUIDWildcard) or uuid == expected:
|
|
1126
|
+
return DataTypeMatch(data[:16], uuid)
|
|
1127
|
+
else:
|
|
1128
|
+
return DataTypeMatch.INVALID
|
|
1129
|
+
|
|
1130
|
+
|
|
1131
|
+
class UTF16Type(DataType[bytes]):
|
|
1132
|
+
def __init__(self, endianness: Endianness):
|
|
1133
|
+
if endianness == Endianness.LITTLE:
|
|
1134
|
+
super().__init__("lestring16")
|
|
1135
|
+
elif endianness == Endianness.BIG:
|
|
1136
|
+
super().__init__("bestring16")
|
|
1137
|
+
else:
|
|
1138
|
+
raise ValueError(f"UTF16 strings only support big and little endianness, not {endianness!r}")
|
|
1139
|
+
self.endianness: Endianness = endianness
|
|
1140
|
+
|
|
1141
|
+
def is_text(self, value: bytes) -> bool:
|
|
1142
|
+
return True
|
|
1143
|
+
|
|
1144
|
+
def parse_expected(self, specification: str) -> bytes:
|
|
1145
|
+
specification = unescape(specification).decode("utf-8")
|
|
1146
|
+
if self.endianness == Endianness.LITTLE:
|
|
1147
|
+
return specification.encode("utf-16-le")
|
|
1148
|
+
else:
|
|
1149
|
+
return specification.encode("utf-16-be")
|
|
1150
|
+
|
|
1151
|
+
def match(self, data: bytes, expected: bytes) -> DataTypeMatch:
|
|
1152
|
+
if data.startswith(expected):
|
|
1153
|
+
if self.endianness == Endianness.LITTLE:
|
|
1154
|
+
return DataTypeMatch(expected, expected.decode("utf-16-le"))
|
|
1155
|
+
else:
|
|
1156
|
+
return DataTypeMatch(expected, expected.decode("utf-16-be"))
|
|
1157
|
+
else:
|
|
1158
|
+
return DataTypeMatch.INVALID
|
|
1159
|
+
|
|
1160
|
+
|
|
1161
|
+
class StringTest(ABC):
|
|
1162
|
+
def __init__(self, trim: bool = False, compact_whitespace: bool = False, num_bytes: Optional[int] = None):
|
|
1163
|
+
self.trim: bool = trim
|
|
1164
|
+
self.compact_whitespace: bool = compact_whitespace
|
|
1165
|
+
self.num_bytes: Optional[int] = num_bytes
|
|
1166
|
+
|
|
1167
|
+
def post_process(self, data: bytes, initial_offset: int = 0) -> DataTypeMatch:
|
|
1168
|
+
value = data
|
|
1169
|
+
# if self.compact_whitespace:
|
|
1170
|
+
# value = b"".join(c for prev, c in zip(b"\0" + data, data) if c not in WHITESPACE or prev not in WHITESPACE)
|
|
1171
|
+
if self.trim:
|
|
1172
|
+
value = value.strip()
|
|
1173
|
+
try:
|
|
1174
|
+
value = value.decode("utf-8")
|
|
1175
|
+
except UnicodeDecodeError:
|
|
1176
|
+
pass
|
|
1177
|
+
return DataTypeMatch(data, value, initial_offset=initial_offset)
|
|
1178
|
+
|
|
1179
|
+
@abstractmethod
|
|
1180
|
+
def matches(self, data: bytes) -> DataTypeMatch:
|
|
1181
|
+
raise NotImplementedError()
|
|
1182
|
+
|
|
1183
|
+
@abstractmethod
|
|
1184
|
+
def is_always_text(self) -> bool:
|
|
1185
|
+
raise NotImplementedError()
|
|
1186
|
+
|
|
1187
|
+
@abstractmethod
|
|
1188
|
+
def search(self, data: bytes) -> DataTypeMatch:
|
|
1189
|
+
raise NotImplementedError()
|
|
1190
|
+
|
|
1191
|
+
@staticmethod
|
|
1192
|
+
def parse(specification: str,
|
|
1193
|
+
trim: bool = False,
|
|
1194
|
+
compact_whitespace: bool = False,
|
|
1195
|
+
case_insensitive_lower: bool = False,
|
|
1196
|
+
case_insensitive_upper: bool = False,
|
|
1197
|
+
optional_blanks: bool = False,
|
|
1198
|
+
full_word_match: bool = False,
|
|
1199
|
+
num_bytes: Optional[int] = None) -> "StringTest":
|
|
1200
|
+
original_spec = specification
|
|
1201
|
+
if specification.strip() == "x":
|
|
1202
|
+
return StringWildcard(trim=trim, compact_whitespace=compact_whitespace, num_bytes=num_bytes)
|
|
1203
|
+
if specification.startswith("!"):
|
|
1204
|
+
negate = True
|
|
1205
|
+
specification = specification[1:]
|
|
1206
|
+
else:
|
|
1207
|
+
negate = False
|
|
1208
|
+
if specification.startswith(">") or specification.startswith("<"):
|
|
1209
|
+
test = StringLengthTest(
|
|
1210
|
+
to_match=specification[1:],
|
|
1211
|
+
test_smaller=specification.startswith("<"),
|
|
1212
|
+
trim=trim,
|
|
1213
|
+
compact_whitespace=compact_whitespace,
|
|
1214
|
+
num_bytes=num_bytes,
|
|
1215
|
+
)
|
|
1216
|
+
else:
|
|
1217
|
+
if num_bytes is not None:
|
|
1218
|
+
raise ValueError(f"Invalid string match specification: {original_spec!r}: a string length limiter "
|
|
1219
|
+
f"cannot be combined with an explicit string match")
|
|
1220
|
+
if specification.startswith("="):
|
|
1221
|
+
specification = specification[1:]
|
|
1222
|
+
test = StringMatch(
|
|
1223
|
+
to_match=specification,
|
|
1224
|
+
trim=trim,
|
|
1225
|
+
compact_whitespace=compact_whitespace,
|
|
1226
|
+
case_insensitive_lower=case_insensitive_lower,
|
|
1227
|
+
case_insensitive_upper=case_insensitive_upper,
|
|
1228
|
+
optional_blanks=optional_blanks,
|
|
1229
|
+
full_word_match=full_word_match
|
|
1230
|
+
)
|
|
1231
|
+
if negate:
|
|
1232
|
+
return NegatedStringTest(test)
|
|
1233
|
+
else:
|
|
1234
|
+
return test
|
|
1235
|
+
|
|
1236
|
+
|
|
1237
|
+
class StringWildcard(StringTest):
|
|
1238
|
+
def matches(self, data: bytes) -> DataTypeMatch:
|
|
1239
|
+
if self.num_bytes is None:
|
|
1240
|
+
first_null = data.find(b"\0")
|
|
1241
|
+
else:
|
|
1242
|
+
first_null = data.find(b"\0", 0, self.num_bytes)
|
|
1243
|
+
if first_null < 0:
|
|
1244
|
+
return self.post_process(data[:self.num_bytes])
|
|
1245
|
+
if first_null >= 0:
|
|
1246
|
+
return self.post_process(data[:first_null])
|
|
1247
|
+
else:
|
|
1248
|
+
return self.post_process(data)
|
|
1249
|
+
|
|
1250
|
+
def is_always_text(self) -> bool:
|
|
1251
|
+
return False
|
|
1252
|
+
|
|
1253
|
+
def search(self, data: bytes) -> DataTypeMatch:
|
|
1254
|
+
return self.matches(data)
|
|
1255
|
+
|
|
1256
|
+
def __str__(self):
|
|
1257
|
+
return "null-terminated string"
|
|
1258
|
+
|
|
1259
|
+
|
|
1260
|
+
class NegatedStringTest(StringWildcard):
|
|
1261
|
+
def __init__(self, parent_test: StringTest):
|
|
1262
|
+
super().__init__(trim=parent_test.trim, compact_whitespace=parent_test.compact_whitespace)
|
|
1263
|
+
self.parent: StringTest = parent_test
|
|
1264
|
+
|
|
1265
|
+
def is_always_text(self) -> bool:
|
|
1266
|
+
return self.parent.is_always_text()
|
|
1267
|
+
|
|
1268
|
+
def matches(self, data: bytes) -> DataTypeMatch:
|
|
1269
|
+
result = self.parent.matches(data)
|
|
1270
|
+
if result == DataTypeMatch.INVALID:
|
|
1271
|
+
return super().matches(data)
|
|
1272
|
+
else:
|
|
1273
|
+
return DataTypeMatch.INVALID
|
|
1274
|
+
|
|
1275
|
+
def search(self, data: bytes) -> DataTypeMatch:
|
|
1276
|
+
result = self.parent.search(data)
|
|
1277
|
+
if result == DataTypeMatch.INVALID:
|
|
1278
|
+
return super().search(data)
|
|
1279
|
+
else:
|
|
1280
|
+
return DataTypeMatch.INVALID
|
|
1281
|
+
|
|
1282
|
+
def __str__(self):
|
|
1283
|
+
return f"something other than {self.parent!s}"
|
|
1284
|
+
|
|
1285
|
+
|
|
1286
|
+
class StringLengthTest(StringWildcard):
|
|
1287
|
+
def __init__(self, to_match: str, test_smaller: bool, trim: bool = False, compact_whitespace: bool = False,
|
|
1288
|
+
num_bytes: Optional[int] = None):
|
|
1289
|
+
super().__init__(trim=trim, compact_whitespace=compact_whitespace, num_bytes=num_bytes)
|
|
1290
|
+
self.raw_pattern: str = to_match
|
|
1291
|
+
self.to_match: bytes = unescape(to_match)
|
|
1292
|
+
null_termination_index = self.to_match.find(0)
|
|
1293
|
+
if null_termination_index >= 0:
|
|
1294
|
+
self.to_match = self.to_match[:null_termination_index]
|
|
1295
|
+
self.desired_length: int = len(self.to_match)
|
|
1296
|
+
self.test_smaller: bool = test_smaller
|
|
1297
|
+
|
|
1298
|
+
def matches(self, data: bytes) -> DataTypeMatch:
|
|
1299
|
+
match = super().matches(data)
|
|
1300
|
+
if self.desired_length == 0:
|
|
1301
|
+
return match
|
|
1302
|
+
elif self.test_smaller and match.raw_match[:self.desired_length] < self.to_match:
|
|
1303
|
+
return match
|
|
1304
|
+
elif not self.test_smaller and match.raw_match[:self.desired_length] > self.to_match:
|
|
1305
|
+
return match
|
|
1306
|
+
else:
|
|
1307
|
+
return DataTypeMatch.INVALID
|
|
1308
|
+
|
|
1309
|
+
def is_always_text(self) -> bool:
|
|
1310
|
+
return False
|
|
1311
|
+
|
|
1312
|
+
def search(self, data: bytes) -> DataTypeMatch:
|
|
1313
|
+
match = super().search(data)
|
|
1314
|
+
if self.test_smaller and match.raw_match < self.to_match:
|
|
1315
|
+
return match
|
|
1316
|
+
elif not self.test_smaller and match.raw_match > self.to_match:
|
|
1317
|
+
return match
|
|
1318
|
+
else:
|
|
1319
|
+
return DataTypeMatch.INVALID
|
|
1320
|
+
|
|
1321
|
+
def __repr__(self):
|
|
1322
|
+
return f"{self.__class__.__name__}(to_match={self.raw_pattern!r}, test_smaller={self.test_smaller!r}, " \
|
|
1323
|
+
f"trim={self.trim!r}, compact_whitespace={self.compact_whitespace!r}, num_bytes={self.num_bytes!r})"
|
|
1324
|
+
|
|
1325
|
+
def __str__(self):
|
|
1326
|
+
return f"{['>', '<'][self.test_smaller]}{repr(self.to_match)}"
|
|
1327
|
+
|
|
1328
|
+
|
|
1329
|
+
class StringMatch(StringTest):
|
|
1330
|
+
def __init__(self,
|
|
1331
|
+
to_match: str,
|
|
1332
|
+
trim: bool = False,
|
|
1333
|
+
compact_whitespace: bool = False,
|
|
1334
|
+
case_insensitive_lower: bool = False,
|
|
1335
|
+
case_insensitive_upper: bool = False,
|
|
1336
|
+
optional_blanks: bool = False,
|
|
1337
|
+
full_word_match: bool = False
|
|
1338
|
+
):
|
|
1339
|
+
super().__init__(trim=trim, compact_whitespace=compact_whitespace)
|
|
1340
|
+
self.raw_pattern: str = to_match
|
|
1341
|
+
self.string: bytes = unescape(to_match)
|
|
1342
|
+
self.case_insensitive_lower: bool = case_insensitive_lower
|
|
1343
|
+
self.case_insensitive_upper: bool = case_insensitive_upper
|
|
1344
|
+
self.optional_blanks: bool = optional_blanks
|
|
1345
|
+
self.full_word_match: bool = full_word_match
|
|
1346
|
+
if optional_blanks and compact_whitespace:
|
|
1347
|
+
raise ValueError("Optional blanks `w` and compacting whitespace `W` cannot be selected at the same time")
|
|
1348
|
+
self._is_always_text: Optional[bool] = None
|
|
1349
|
+
self._pattern: Optional[re.Pattern] = None
|
|
1350
|
+
_ = self.pattern
|
|
1351
|
+
|
|
1352
|
+
def pattern_string(self) -> bytes:
|
|
1353
|
+
pattern = re.escape(self.string)
|
|
1354
|
+
if self.case_insensitive_lower and not self.case_insensitive_upper:
|
|
1355
|
+
# treat lower case letters as either lower or upper case
|
|
1356
|
+
delta = ord('A') - ord('a')
|
|
1357
|
+
for ordinal in range(ord('a'), ord('z') + 1):
|
|
1358
|
+
pattern = pattern.replace(bytes([ordinal]), f"[{chr(ordinal)}{chr(ordinal+delta)}]".encode("utf-8"))
|
|
1359
|
+
elif not self.case_insensitive_lower and self.case_insensitive_upper:
|
|
1360
|
+
# treat upper case letters as either lower or upper case
|
|
1361
|
+
delta = ord('a') - ord('A')
|
|
1362
|
+
for ordinal in range(ord('A'), ord('Z') + 1):
|
|
1363
|
+
pattern = pattern.replace(bytes([ordinal]), f"[{chr(ordinal)}{chr(ordinal+delta)}]".encode("utf-8"))
|
|
1364
|
+
if self.compact_whitespace:
|
|
1365
|
+
new_pattern_bytes: List[Tuple[bytes, int]] = []
|
|
1366
|
+
escaped = False
|
|
1367
|
+
for c in (bytes([b]) for b in pattern):
|
|
1368
|
+
if escaped:
|
|
1369
|
+
c = b"\\" + c
|
|
1370
|
+
escaped = False
|
|
1371
|
+
elif c == b"\\":
|
|
1372
|
+
escaped = True
|
|
1373
|
+
continue
|
|
1374
|
+
if new_pattern_bytes and new_pattern_bytes[-1][0] == c:
|
|
1375
|
+
new_pattern_bytes[-1] = (c, new_pattern_bytes[-1][1] + 1)
|
|
1376
|
+
else:
|
|
1377
|
+
new_pattern_bytes.append((c, 1))
|
|
1378
|
+
if escaped:
|
|
1379
|
+
raise ValueError(f"Error parsing search pattern {self.string!r}")
|
|
1380
|
+
pattern_bytes = bytearray()
|
|
1381
|
+
for c, count in new_pattern_bytes:
|
|
1382
|
+
pattern_bytes.extend(c)
|
|
1383
|
+
if c in (b'\\ ', b'\\s', b'\\t', b'\\r', b'\\v', b'\\f'):
|
|
1384
|
+
# this is whitespace
|
|
1385
|
+
if count == 1:
|
|
1386
|
+
pattern_bytes.extend(b"+")
|
|
1387
|
+
else:
|
|
1388
|
+
pattern_bytes.extend(f"{{{count},}}".encode("utf-8"))
|
|
1389
|
+
elif count > 1:
|
|
1390
|
+
pattern_bytes.extend(f"{{{count}}}".encode("utf-8"))
|
|
1391
|
+
pattern = bytes(pattern_bytes)
|
|
1392
|
+
elif self.optional_blanks:
|
|
1393
|
+
pattern = pattern.replace(rb"\ ", rb"\ ?")
|
|
1394
|
+
if self.full_word_match:
|
|
1395
|
+
pattern = rb"\b" + pattern + rb"\b"
|
|
1396
|
+
return pattern
|
|
1397
|
+
|
|
1398
|
+
def pattern_flags(self) -> int:
|
|
1399
|
+
flags: int = 0
|
|
1400
|
+
if self.case_insensitive_upper and self.case_insensitive_lower:
|
|
1401
|
+
flags |= re.IGNORECASE
|
|
1402
|
+
return flags
|
|
1403
|
+
|
|
1404
|
+
@property
|
|
1405
|
+
def pattern(self) -> re.Pattern:
|
|
1406
|
+
if self._pattern is None:
|
|
1407
|
+
self._pattern = re.compile(self.pattern_string(), flags=self.pattern_flags())
|
|
1408
|
+
return self._pattern
|
|
1409
|
+
|
|
1410
|
+
def is_always_text(self) -> bool:
|
|
1411
|
+
if self._is_always_text is None:
|
|
1412
|
+
if "\\x" in self.raw_pattern or "\\0" in self.raw_pattern:
|
|
1413
|
+
# the string has hex escapes, so do not treat it as text
|
|
1414
|
+
self._is_always_text = False
|
|
1415
|
+
else:
|
|
1416
|
+
try:
|
|
1417
|
+
_ = self.pattern.pattern.decode("ascii")
|
|
1418
|
+
self._is_always_text = True
|
|
1419
|
+
except UnicodeDecodeError:
|
|
1420
|
+
self._is_always_text = False
|
|
1421
|
+
return self._is_always_text
|
|
1422
|
+
|
|
1423
|
+
def matches(self, data: bytes) -> DataTypeMatch:
|
|
1424
|
+
m = self.pattern.match(data)
|
|
1425
|
+
if m:
|
|
1426
|
+
return self.post_process(bytes(m.group(0)))
|
|
1427
|
+
return DataTypeMatch.INVALID
|
|
1428
|
+
|
|
1429
|
+
def search(self, data: bytes) -> DataTypeMatch:
|
|
1430
|
+
m = self.pattern.search(data)
|
|
1431
|
+
if m:
|
|
1432
|
+
return self.post_process(bytes(m.group(0)), initial_offset=m.start())
|
|
1433
|
+
return DataTypeMatch.INVALID
|
|
1434
|
+
|
|
1435
|
+
def __str__(self):
|
|
1436
|
+
return repr(self.string)
|
|
1437
|
+
|
|
1438
|
+
|
|
1439
|
+
class StringType(DataType[StringTest]):
|
|
1440
|
+
def __init__(
|
|
1441
|
+
self,
|
|
1442
|
+
case_insensitive_lower: bool = False,
|
|
1443
|
+
case_insensitive_upper: bool = False,
|
|
1444
|
+
compact_whitespace: bool = False,
|
|
1445
|
+
optional_blanks: bool = False,
|
|
1446
|
+
full_word_match: bool = False,
|
|
1447
|
+
trim: bool = False,
|
|
1448
|
+
force_text: bool = False,
|
|
1449
|
+
num_bytes: Optional[int] = None
|
|
1450
|
+
):
|
|
1451
|
+
if not any((num_bytes is not None, case_insensitive_lower, case_insensitive_upper, compact_whitespace,
|
|
1452
|
+
optional_blanks, trim, force_text)):
|
|
1453
|
+
name = "string"
|
|
1454
|
+
else:
|
|
1455
|
+
if num_bytes is not None:
|
|
1456
|
+
name = f"{num_bytes}/"
|
|
1457
|
+
else:
|
|
1458
|
+
name = ""
|
|
1459
|
+
name = f"string/{name}{['', 'W'][compact_whitespace]}{['', 'w'][optional_blanks]}"\
|
|
1460
|
+
f"{['', 'C'][case_insensitive_upper]}{['', 'c'][case_insensitive_lower]}"\
|
|
1461
|
+
f"{['', 'T'][trim]}{['', 'f'][full_word_match]}{['', 't'][force_text]}"
|
|
1462
|
+
super().__init__(name)
|
|
1463
|
+
self.case_insensitive_lower: bool = case_insensitive_lower
|
|
1464
|
+
self.case_insensitive_upper: bool = case_insensitive_upper
|
|
1465
|
+
self.compact_whitespace: bool = compact_whitespace
|
|
1466
|
+
self.optional_blanks: bool = optional_blanks
|
|
1467
|
+
self.full_word_match: bool = full_word_match
|
|
1468
|
+
self.trim: bool = trim
|
|
1469
|
+
self.force_text: bool = force_text
|
|
1470
|
+
self.num_bytes: Optional[int] = num_bytes
|
|
1471
|
+
|
|
1472
|
+
def is_text(self, value: StringTest) -> bool:
|
|
1473
|
+
return self.force_text
|
|
1474
|
+
|
|
1475
|
+
def allows_invalid_offsets(self, expected: StringTest) -> bool:
|
|
1476
|
+
return isinstance(expected, NegatedStringTest)
|
|
1477
|
+
|
|
1478
|
+
def parse_expected(self, specification: str) -> StringTest:
|
|
1479
|
+
return StringTest.parse(
|
|
1480
|
+
specification,
|
|
1481
|
+
trim=self.trim,
|
|
1482
|
+
case_insensitive_lower=self.case_insensitive_lower,
|
|
1483
|
+
case_insensitive_upper=self.case_insensitive_upper,
|
|
1484
|
+
compact_whitespace=self.compact_whitespace,
|
|
1485
|
+
full_word_match=self.full_word_match,
|
|
1486
|
+
num_bytes=self.num_bytes
|
|
1487
|
+
)
|
|
1488
|
+
|
|
1489
|
+
def match(self, data: bytes, expected: StringTest) -> DataTypeMatch:
|
|
1490
|
+
return expected.matches(data)
|
|
1491
|
+
|
|
1492
|
+
STRING_TYPE_FORMAT: Pattern[str] = re.compile(r"^u?string(/(?P<numbytes>\d+))?(?P<opts>/[BbCctTWwf]*)?$")
|
|
1493
|
+
|
|
1494
|
+
@classmethod
|
|
1495
|
+
def parse(cls, format_str: str) -> "StringType":
|
|
1496
|
+
m = cls.STRING_TYPE_FORMAT.match(format_str)
|
|
1497
|
+
if not m:
|
|
1498
|
+
raise ValueError(f"Invalid string type declaration: {format_str!r}")
|
|
1499
|
+
if m.group("numbytes") is None:
|
|
1500
|
+
num_bytes: Optional[int] = None
|
|
1501
|
+
else:
|
|
1502
|
+
num_bytes = int(m.group("numbytes"))
|
|
1503
|
+
if m.group("opts") is None:
|
|
1504
|
+
options: Iterable[str] = ()
|
|
1505
|
+
else:
|
|
1506
|
+
options = m.group("opts")
|
|
1507
|
+
unsupported_options = {opt for opt in options if opt not in "/WwcCtbTf"}
|
|
1508
|
+
if unsupported_options:
|
|
1509
|
+
log.warning(f"{format_str!r} has invalid option(s) that will be ignored: {', '.join(unsupported_options)}")
|
|
1510
|
+
return StringType(
|
|
1511
|
+
case_insensitive_lower="c" in options,
|
|
1512
|
+
case_insensitive_upper="C" in options,
|
|
1513
|
+
compact_whitespace="W" in options,
|
|
1514
|
+
optional_blanks="w" in options,
|
|
1515
|
+
full_word_match="f" in options,
|
|
1516
|
+
trim="T" in options,
|
|
1517
|
+
force_text="t" in options,
|
|
1518
|
+
num_bytes=num_bytes
|
|
1519
|
+
)
|
|
1520
|
+
|
|
1521
|
+
|
|
1522
|
+
class SearchType(StringType):
|
|
1523
|
+
def __init__(
|
|
1524
|
+
self,
|
|
1525
|
+
repetitions: Optional[int] = None,
|
|
1526
|
+
case_insensitive_lower: bool = False,
|
|
1527
|
+
case_insensitive_upper: bool = False,
|
|
1528
|
+
compact_whitespace: bool = False,
|
|
1529
|
+
optional_blanks: bool = False,
|
|
1530
|
+
match_to_start: bool = False,
|
|
1531
|
+
full_word_match: bool = False,
|
|
1532
|
+
trim: bool = False
|
|
1533
|
+
):
|
|
1534
|
+
if repetitions is not None and repetitions <= 0:
|
|
1535
|
+
raise ValueError("repetitions must be either None or a positive integer")
|
|
1536
|
+
super().__init__(
|
|
1537
|
+
case_insensitive_lower=case_insensitive_lower,
|
|
1538
|
+
case_insensitive_upper=case_insensitive_upper,
|
|
1539
|
+
compact_whitespace=compact_whitespace,
|
|
1540
|
+
optional_blanks=optional_blanks,
|
|
1541
|
+
full_word_match=full_word_match,
|
|
1542
|
+
trim=trim
|
|
1543
|
+
)
|
|
1544
|
+
self.repetitions: Optional[int] = repetitions
|
|
1545
|
+
if repetitions is None:
|
|
1546
|
+
rep_str = ""
|
|
1547
|
+
else:
|
|
1548
|
+
rep_str = f"/{repetitions}"
|
|
1549
|
+
assert self.name.startswith("string")
|
|
1550
|
+
self.name = f"search{rep_str}{self.name[6:]}"
|
|
1551
|
+
self.match_to_start: bool = match_to_start
|
|
1552
|
+
if match_to_start:
|
|
1553
|
+
if self.name == f"search{rep_str}":
|
|
1554
|
+
self.name = f"search{rep_str}/s"
|
|
1555
|
+
else:
|
|
1556
|
+
self.name = f"{self.name}s"
|
|
1557
|
+
|
|
1558
|
+
def is_text(self, value: StringTest) -> bool:
|
|
1559
|
+
return value.is_always_text()
|
|
1560
|
+
|
|
1561
|
+
def match(self, data: bytes, expected: StringTest) -> DataTypeMatch:
|
|
1562
|
+
return expected.search(data)
|
|
1563
|
+
|
|
1564
|
+
SEARCH_TYPE_FORMAT: Pattern[str] = re.compile(
|
|
1565
|
+
r"^search"
|
|
1566
|
+
r"((/(?P<repetitions1>(0[xX][\dA-Fa-f]+|\d+)))(/(?P<flags1>[BbCctTWwsf]*)?)?|"
|
|
1567
|
+
r"/((?P<flags2>[BbCctTWwsf]*)/?)?(?P<repetitions2>(0[xX][\dA-Fa-f]+|\d+)))$"
|
|
1568
|
+
)
|
|
1569
|
+
# NOTE: some specification files like `ber` use `search/b64`, which is undocumented. We treat that equivalent to
|
|
1570
|
+
# the compliant `search/b/64`.
|
|
1571
|
+
# TODO: Figure out if this is correct.
|
|
1572
|
+
|
|
1573
|
+
@classmethod
|
|
1574
|
+
def parse(cls, format_str: str) -> "SearchType":
|
|
1575
|
+
if format_str == "search":
|
|
1576
|
+
# it's undocumented, but you can apparently use the search test without an explicit repetition number
|
|
1577
|
+
return SearchType()
|
|
1578
|
+
m = cls.SEARCH_TYPE_FORMAT.match(format_str)
|
|
1579
|
+
if not m:
|
|
1580
|
+
raise ValueError(f"Invalid search type declaration: {format_str!r}")
|
|
1581
|
+
if m.group("repetitions1") is not None:
|
|
1582
|
+
repetitions = parse_numeric(m.group("repetitions1"))
|
|
1583
|
+
flags = m.group("flags1")
|
|
1584
|
+
elif m.group("repetitions2") is not None:
|
|
1585
|
+
repetitions = parse_numeric(m.group("repetitions2"))
|
|
1586
|
+
flags = m.group("flags2")
|
|
1587
|
+
else:
|
|
1588
|
+
raise ValueError(f"Invalid search type declaration: {format_str!r}")
|
|
1589
|
+
if flags is None:
|
|
1590
|
+
options: Iterable[str] = ()
|
|
1591
|
+
else:
|
|
1592
|
+
options = flags
|
|
1593
|
+
return SearchType(
|
|
1594
|
+
repetitions=repetitions,
|
|
1595
|
+
case_insensitive_lower="c" in options,
|
|
1596
|
+
case_insensitive_upper="C" in options,
|
|
1597
|
+
compact_whitespace="B" in options or "W" in options,
|
|
1598
|
+
optional_blanks="b" in options or "w" in options,
|
|
1599
|
+
full_word_match="f" in options,
|
|
1600
|
+
trim="T" in options,
|
|
1601
|
+
match_to_start="s" in options
|
|
1602
|
+
)
|
|
1603
|
+
|
|
1604
|
+
|
|
1605
|
+
class PascalStringType(DataType[StringTest]):
|
|
1606
|
+
def __init__(
|
|
1607
|
+
self,
|
|
1608
|
+
byte_length: int = 1,
|
|
1609
|
+
endianness: Endianness = Endianness.BIG,
|
|
1610
|
+
count_includes_length: bool = False
|
|
1611
|
+
):
|
|
1612
|
+
if endianness != Endianness.BIG and endianness != Endianness.LITTLE:
|
|
1613
|
+
raise ValueError("Endianness must be either BIG or LITTLE")
|
|
1614
|
+
elif byte_length == 1:
|
|
1615
|
+
modifier = "B"
|
|
1616
|
+
elif byte_length == 2:
|
|
1617
|
+
if endianness == Endianness.BIG:
|
|
1618
|
+
modifier = "H"
|
|
1619
|
+
else:
|
|
1620
|
+
modifier = "h"
|
|
1621
|
+
elif byte_length == 4:
|
|
1622
|
+
if endianness == Endianness.BIG:
|
|
1623
|
+
modifier = "L"
|
|
1624
|
+
else:
|
|
1625
|
+
modifier = "l"
|
|
1626
|
+
else:
|
|
1627
|
+
raise ValueError("byte_length must be either 1, 2, or 4")
|
|
1628
|
+
if count_includes_length:
|
|
1629
|
+
modifier = f"{modifier}J"
|
|
1630
|
+
super().__init__(f"pstring/{modifier}")
|
|
1631
|
+
self.byte_length: int = byte_length
|
|
1632
|
+
self.endianness: Endianness = endianness
|
|
1633
|
+
self.count_includes_length: int = count_includes_length
|
|
1634
|
+
|
|
1635
|
+
def is_text(self, value: StringTest) -> bool:
|
|
1636
|
+
# TODO: See if Pascal strings should sometimes be forced to be text
|
|
1637
|
+
return False
|
|
1638
|
+
|
|
1639
|
+
def parse_expected(self, specification: str) -> StringTest:
|
|
1640
|
+
return StringTest.parse(specification)
|
|
1641
|
+
|
|
1642
|
+
def match(self, data: bytes, expected: StringTest) -> DataTypeMatch:
|
|
1643
|
+
if len(data) < self.byte_length:
|
|
1644
|
+
return DataTypeMatch.INVALID
|
|
1645
|
+
elif self.byte_length == 1:
|
|
1646
|
+
length = data[0]
|
|
1647
|
+
elif self.byte_length == 2:
|
|
1648
|
+
if self.endianness == Endianness.BIG:
|
|
1649
|
+
length = struct.unpack(">H", data[:2])[0]
|
|
1650
|
+
else:
|
|
1651
|
+
length = struct.unpack("<H", data[:2])[0]
|
|
1652
|
+
elif self.endianness == Endianness.BIG:
|
|
1653
|
+
length = struct.unpack(">I", data[:4])[0]
|
|
1654
|
+
else:
|
|
1655
|
+
length = struct.unpack("<I", data[:4])[0]
|
|
1656
|
+
if self.count_includes_length:
|
|
1657
|
+
length -= self.byte_length
|
|
1658
|
+
if len(data) < self.byte_length + length:
|
|
1659
|
+
return DataTypeMatch.INVALID
|
|
1660
|
+
m = expected.matches(data[self.byte_length:self.byte_length + length])
|
|
1661
|
+
if m:
|
|
1662
|
+
m.raw_match = data[:self.byte_length + length]
|
|
1663
|
+
return m
|
|
1664
|
+
|
|
1665
|
+
PSTRING_TYPE_FORMAT: Pattern[str] = re.compile(r"^pstring(/J?[BHhLl]?J?)?$")
|
|
1666
|
+
|
|
1667
|
+
@classmethod
|
|
1668
|
+
def parse(cls, format_str: str) -> "PascalStringType":
|
|
1669
|
+
m = cls.PSTRING_TYPE_FORMAT.match(format_str)
|
|
1670
|
+
if not m:
|
|
1671
|
+
raise ValueError(f"Invalid pstring type declaration: {format_str!r}")
|
|
1672
|
+
if m.group(1) is None:
|
|
1673
|
+
options: Iterable[str] = ()
|
|
1674
|
+
else:
|
|
1675
|
+
options = m.group(1)
|
|
1676
|
+
if "H" in options:
|
|
1677
|
+
byte_length = 2
|
|
1678
|
+
endianness = Endianness.BIG
|
|
1679
|
+
elif "h" in options:
|
|
1680
|
+
byte_length = 2
|
|
1681
|
+
endianness = Endianness.LITTLE
|
|
1682
|
+
elif "L" in options:
|
|
1683
|
+
byte_length = 4
|
|
1684
|
+
endianness = Endianness.BIG
|
|
1685
|
+
elif "l" in options:
|
|
1686
|
+
byte_length = 4
|
|
1687
|
+
endianness = Endianness.LITTLE
|
|
1688
|
+
else:
|
|
1689
|
+
byte_length = 1
|
|
1690
|
+
endianness = Endianness.BIG
|
|
1691
|
+
return PascalStringType(
|
|
1692
|
+
byte_length=byte_length,
|
|
1693
|
+
endianness=endianness,
|
|
1694
|
+
count_includes_length="J" in options
|
|
1695
|
+
)
|
|
1696
|
+
|
|
1697
|
+
|
|
1698
|
+
def posix_to_python_re(match: bytes) -> bytes:
|
|
1699
|
+
for match_from, replace_with in (
|
|
1700
|
+
("upper", "A-Z"),
|
|
1701
|
+
("lower", "a-z"),
|
|
1702
|
+
("alpha", "A-Za-z"),
|
|
1703
|
+
("digit", "0-9"),
|
|
1704
|
+
("xdigit", "0-9A-Fa-f"),
|
|
1705
|
+
("alnum", "A-Za-z0-9"),
|
|
1706
|
+
("punct", ",./<>?`;':\"\\[\\]{}\\|~!@#$%\\^&*()_+-=\\\\"),
|
|
1707
|
+
("blank", " \t"),
|
|
1708
|
+
("space", " \t\n\r\f\v"),
|
|
1709
|
+
("cntrl", "\0-\x1f\x7f"),
|
|
1710
|
+
("graph", "^\0-\x1f\x7f "),
|
|
1711
|
+
("print", "^\0-\x1f\x7f"),
|
|
1712
|
+
("word", "\\w")
|
|
1713
|
+
):
|
|
1714
|
+
match = match.replace(f"[:{match_from}:]".encode("utf-8"), f"{replace_with}".encode("utf-8"))
|
|
1715
|
+
return match
|
|
1716
|
+
|
|
1717
|
+
|
|
1718
|
+
class RegexType(DataType[Pattern[bytes]]):
|
|
1719
|
+
def __init__(
|
|
1720
|
+
self,
|
|
1721
|
+
length: Optional[int] = None,
|
|
1722
|
+
case_insensitive: bool = False,
|
|
1723
|
+
match_to_start: bool = False,
|
|
1724
|
+
limit_lines: bool = False,
|
|
1725
|
+
trim: bool = False
|
|
1726
|
+
):
|
|
1727
|
+
if length is None:
|
|
1728
|
+
if limit_lines:
|
|
1729
|
+
length = 8 * 1024 // 80 # libmagic assumes 80 bytes per line
|
|
1730
|
+
else:
|
|
1731
|
+
length = 8 * 1024 # libmagic limits to 8KiB by default
|
|
1732
|
+
self.limit_lines: bool = limit_lines
|
|
1733
|
+
self.length: int = length
|
|
1734
|
+
self.case_insensitive: bool = case_insensitive
|
|
1735
|
+
self.match_to_start: bool = match_to_start
|
|
1736
|
+
self.trim: bool = trim
|
|
1737
|
+
super().__init__(f"regex/{self.length}{['', 'c'][case_insensitive]}{['', 's'][match_to_start]}"
|
|
1738
|
+
f"{['', 'l'][self.limit_lines]}{['', 'T'][self.trim]}")
|
|
1739
|
+
|
|
1740
|
+
DOLLAR_PATTERN = re.compile(rb"(^|[^\\])\$", re.MULTILINE)
|
|
1741
|
+
|
|
1742
|
+
def is_text(self, value: Pattern[bytes]) -> bool:
|
|
1743
|
+
try:
|
|
1744
|
+
_ = value.pattern.decode("ascii")
|
|
1745
|
+
return True
|
|
1746
|
+
except UnicodeDecodeError:
|
|
1747
|
+
return False
|
|
1748
|
+
|
|
1749
|
+
def parse_expected(self, specification: str) -> Pattern[bytes]:
|
|
1750
|
+
# handle POSIX-style character classes:
|
|
1751
|
+
unescaped_spec = posix_to_python_re(unescape(specification))
|
|
1752
|
+
# convert '$' to '[\r$]'
|
|
1753
|
+
# unescaped_spec = self.__class__.DOLLAR_PATTERN.sub(rb"[\r$]", unescaped_spec)
|
|
1754
|
+
try:
|
|
1755
|
+
if self.case_insensitive:
|
|
1756
|
+
return re.compile(unescaped_spec, re.IGNORECASE | re.MULTILINE)
|
|
1757
|
+
else:
|
|
1758
|
+
return re.compile(unescaped_spec, re.MULTILINE)
|
|
1759
|
+
except re.error as e:
|
|
1760
|
+
raise ValueError(str(e))
|
|
1761
|
+
|
|
1762
|
+
def match(self, data: bytes, expected: Pattern[bytes]) -> DataTypeMatch:
|
|
1763
|
+
if self.limit_lines:
|
|
1764
|
+
limit = self.length
|
|
1765
|
+
offset = 0
|
|
1766
|
+
byte_limit = 80 * self.length # libmagic uses an implicit byte limit assuming 80 characters per line
|
|
1767
|
+
while limit > 0:
|
|
1768
|
+
limit -= 1
|
|
1769
|
+
line_offset = data.find(b"\n", offset, byte_limit)
|
|
1770
|
+
if line_offset < 0:
|
|
1771
|
+
return DataTypeMatch.INVALID
|
|
1772
|
+
line = data[offset:line_offset]
|
|
1773
|
+
m = expected.match(line)
|
|
1774
|
+
if m:
|
|
1775
|
+
match = data[:offset + m.end()]
|
|
1776
|
+
try:
|
|
1777
|
+
value = match.decode("utf-8")
|
|
1778
|
+
except UnicodeDecodeError:
|
|
1779
|
+
value = match
|
|
1780
|
+
if self.trim:
|
|
1781
|
+
value = value.strip()
|
|
1782
|
+
return DataTypeMatch(match, value)
|
|
1783
|
+
offset = line_offset + 1
|
|
1784
|
+
else:
|
|
1785
|
+
m = expected.search(data[:self.length])
|
|
1786
|
+
if m:
|
|
1787
|
+
match = data[:m.end()]
|
|
1788
|
+
try:
|
|
1789
|
+
value = match.decode("utf-8")
|
|
1790
|
+
except UnicodeDecodeError:
|
|
1791
|
+
value = match
|
|
1792
|
+
if self.trim:
|
|
1793
|
+
value = value.strip()
|
|
1794
|
+
return DataTypeMatch(match, value)
|
|
1795
|
+
else:
|
|
1796
|
+
return DataTypeMatch.INVALID
|
|
1797
|
+
|
|
1798
|
+
REGEX_TYPE_FORMAT: Pattern[str] = re.compile(r"^regex(/(?P<length>\d+)?(?P<flags>[cslT]*)(b\d*)?)?$")
|
|
1799
|
+
# NOTE: some specification files like `cad` use `regex/b`, which is undocumented, and it's unclear from the libmagic
|
|
1800
|
+
# source code whether it is simply ignored or if it has a purpuse. We ignore it here.
|
|
1801
|
+
|
|
1802
|
+
@classmethod
|
|
1803
|
+
def parse(cls, format_str: str) -> "RegexType":
|
|
1804
|
+
m = cls.REGEX_TYPE_FORMAT.match(format_str)
|
|
1805
|
+
if not m:
|
|
1806
|
+
raise ValueError(f"Invalid regex type declaration: {format_str!r}")
|
|
1807
|
+
if m.group("flags") is None:
|
|
1808
|
+
options: Iterable[str] = ()
|
|
1809
|
+
else:
|
|
1810
|
+
options = m.group("flags")
|
|
1811
|
+
if m.group("length") is None:
|
|
1812
|
+
length: Optional[int] = None
|
|
1813
|
+
else:
|
|
1814
|
+
length = int(m.group("length"))
|
|
1815
|
+
return RegexType(
|
|
1816
|
+
length=length,
|
|
1817
|
+
case_insensitive="c" in options,
|
|
1818
|
+
match_to_start="s" in options,
|
|
1819
|
+
limit_lines="l" in options,
|
|
1820
|
+
trim="T" in options
|
|
1821
|
+
)
|
|
1822
|
+
|
|
1823
|
+
|
|
1824
|
+
BASE_NUMERIC_TYPES_BY_NAME: Dict[str, "BaseNumericDataType"] = {}
|
|
1825
|
+
|
|
1826
|
+
|
|
1827
|
+
DATETIME_FORMAT: str = "%a %b %e %H:%M:%S %Y"
|
|
1828
|
+
DATE_FORMAT: str = "%a %b %e %Y"
|
|
1829
|
+
TIME_FORMAT: str = "%H:%M:%S"
|
|
1830
|
+
|
|
1831
|
+
|
|
1832
|
+
def local_date(ms_since_epoch: int) -> str:
|
|
1833
|
+
return strftime(DATETIME_FORMAT, localtime(ms_since_epoch / 1000.0))
|
|
1834
|
+
|
|
1835
|
+
|
|
1836
|
+
def utc_date(ms_since_epoch: int) -> str:
|
|
1837
|
+
return strftime(DATETIME_FORMAT, gmtime(ms_since_epoch / 1000.0))
|
|
1838
|
+
|
|
1839
|
+
|
|
1840
|
+
def msdos_date(value: int) -> str:
|
|
1841
|
+
day = (value & 0b11111) + 1
|
|
1842
|
+
value >>= 5
|
|
1843
|
+
month = (value & 0b1111) + 1
|
|
1844
|
+
value >>= 4
|
|
1845
|
+
year = 1980 + (value & 0b1111111)
|
|
1846
|
+
return strftime(DATE_FORMAT, datetime(year, month, day).timetuple())
|
|
1847
|
+
|
|
1848
|
+
|
|
1849
|
+
def msdos_time(value: int) -> str:
|
|
1850
|
+
seconds = (value & 0b11111) * 2
|
|
1851
|
+
value >>= 5
|
|
1852
|
+
minutes = value & 0b111111
|
|
1853
|
+
value >>= 6
|
|
1854
|
+
hour = value & 0b11111
|
|
1855
|
+
return strftime(TIME_FORMAT, datetime(1, 1, 1, hour, minutes, seconds).timetuple())
|
|
1856
|
+
|
|
1857
|
+
|
|
1858
|
+
class BaseNumericDataType(Enum):
|
|
1859
|
+
BYTE = ("byte", "b", 1)
|
|
1860
|
+
BYTE1 = ("1", "b", 1)
|
|
1861
|
+
SHORT = ("short", "h", 2)
|
|
1862
|
+
SHORT2 = ("2", "h", 2)
|
|
1863
|
+
LONG = ("long", "l", 4)
|
|
1864
|
+
LONG4 = ("4", "l", 4)
|
|
1865
|
+
QUAD = ("quad", "q", 8)
|
|
1866
|
+
QUAD8 = ("8", "q", 8)
|
|
1867
|
+
FLOAT = ("float", "f", 4)
|
|
1868
|
+
DOUBLE = ("double", "d", 8)
|
|
1869
|
+
DATE = ("date", "L", 4, lambda n: utc_date(n * 1000))
|
|
1870
|
+
QDATE = ("qdate", "Q", 8, lambda n: utc_date(n * 1000))
|
|
1871
|
+
LDATE = ("ldate", "L", 4, lambda n: local_date(n * 1000))
|
|
1872
|
+
QLDATE = ("qldate", "Q", 8, lambda n: local_date(n * 1000))
|
|
1873
|
+
QWDATE = ("qwdate", "Q", 8)
|
|
1874
|
+
MSDOSDATE = ("msdosdate", "h", 2, msdos_date)
|
|
1875
|
+
MSDOSTIME = ("msdostime", "h", 2, msdos_time)
|
|
1876
|
+
|
|
1877
|
+
def __init__(
|
|
1878
|
+
self, name: str,
|
|
1879
|
+
struct_fmt: str,
|
|
1880
|
+
num_bytes: int,
|
|
1881
|
+
to_value: Callable[[int], Any] = lambda n: n
|
|
1882
|
+
):
|
|
1883
|
+
self.struct_fmt: str = struct_fmt
|
|
1884
|
+
self.num_bytes: int = num_bytes
|
|
1885
|
+
self.to_value: Callable[[int], Any] = to_value
|
|
1886
|
+
BASE_NUMERIC_TYPES_BY_NAME[name] = self
|
|
1887
|
+
|
|
1888
|
+
|
|
1889
|
+
NUMERIC_OPERATORS_BY_SYMBOL: Dict[str, "NumericOperator"] = {}
|
|
1890
|
+
|
|
1891
|
+
|
|
1892
|
+
class NumericOperator(Enum):
|
|
1893
|
+
EQUALS = ("=", lambda a, b: a == b)
|
|
1894
|
+
LESS_THAN = ("<", lambda a, b: a < b)
|
|
1895
|
+
GREATER_THAN = (">", lambda a, b: a > b)
|
|
1896
|
+
ALL_BITS_SET = ("&", lambda a, b: (a & b) == b) # value from the file (a) must have set all bits set in b
|
|
1897
|
+
ALL_BITS_CLEAR = ("^", lambda a, b: not (a & b)) # value from the file (a) must have clear all bits set in b
|
|
1898
|
+
NOT = ("!", lambda a, b: not (a == b))
|
|
1899
|
+
|
|
1900
|
+
def __init__(self, symbol: str, test: Union[
|
|
1901
|
+
Callable[[int, int], bool],
|
|
1902
|
+
Callable[[float, float], bool],
|
|
1903
|
+
Callable[[CStyleInt, CStyleInt], bool]
|
|
1904
|
+
]):
|
|
1905
|
+
self.symbol: str = symbol
|
|
1906
|
+
self.test: Union[
|
|
1907
|
+
Callable[[int, int], bool], Callable[[float, float], bool], Callable[[CStyleInt, CStyleInt], bool]
|
|
1908
|
+
] = test
|
|
1909
|
+
NUMERIC_OPERATORS_BY_SYMBOL[symbol] = self
|
|
1910
|
+
|
|
1911
|
+
@staticmethod
|
|
1912
|
+
def get(symbol: str) -> "NumericOperator":
|
|
1913
|
+
return NUMERIC_OPERATORS_BY_SYMBOL[symbol]
|
|
1914
|
+
|
|
1915
|
+
def __str__(self):
|
|
1916
|
+
return self.symbol
|
|
1917
|
+
|
|
1918
|
+
|
|
1919
|
+
class NumericValue(Generic[T]):
|
|
1920
|
+
def __init__(self, value: T, operator: NumericOperator = NumericOperator.EQUALS):
|
|
1921
|
+
self.value: T = value
|
|
1922
|
+
self.operator: NumericOperator = operator
|
|
1923
|
+
|
|
1924
|
+
def test(self, to_match: T, unsigned: bool, num_bytes: int, preprocess: Callable[[T], T] = lambda x: x) -> bool:
|
|
1925
|
+
return self.operator.test(preprocess(to_match), self.value)
|
|
1926
|
+
|
|
1927
|
+
@staticmethod
|
|
1928
|
+
def parse(value: str, num_bytes: int) -> "NumericValue":
|
|
1929
|
+
value = value.strip()
|
|
1930
|
+
try:
|
|
1931
|
+
return IntegerValue.parse(value, num_bytes)
|
|
1932
|
+
except ValueError:
|
|
1933
|
+
pass
|
|
1934
|
+
try:
|
|
1935
|
+
return FloatValue.parse(value, num_bytes)
|
|
1936
|
+
except ValueError:
|
|
1937
|
+
pass
|
|
1938
|
+
raise ValueError(f"Could not parse numeric type {value!r}")
|
|
1939
|
+
|
|
1940
|
+
def __str__(self):
|
|
1941
|
+
return f"{self.operator}{self.value!s}"
|
|
1942
|
+
|
|
1943
|
+
|
|
1944
|
+
class NumericWildcard(NumericValue):
|
|
1945
|
+
def __init__(self):
|
|
1946
|
+
super().__init__(None)
|
|
1947
|
+
|
|
1948
|
+
def test(self, to_match, unsigned, num_bytes, preprocess: Callable[[int], int] = lambda x: x) -> bool:
|
|
1949
|
+
return True
|
|
1950
|
+
|
|
1951
|
+
|
|
1952
|
+
class IntegerValue(NumericValue[int]):
|
|
1953
|
+
def test(
|
|
1954
|
+
self,
|
|
1955
|
+
to_match: int,
|
|
1956
|
+
unsigned: bool,
|
|
1957
|
+
num_bytes: int,
|
|
1958
|
+
preprocess: Callable[[CStyleInt], CStyleInt] = lambda x: x
|
|
1959
|
+
) -> bool:
|
|
1960
|
+
to_test = make_c_style_int(value=self.value, num_bytes=num_bytes, signed=not unsigned)
|
|
1961
|
+
to_match = make_c_style_int(value=to_match, num_bytes=num_bytes, signed=not unsigned)
|
|
1962
|
+
return self.operator.test(preprocess(to_match), to_test)
|
|
1963
|
+
|
|
1964
|
+
@staticmethod
|
|
1965
|
+
def parse(value: Union[str, bytes], num_bytes: int) -> "IntegerValue":
|
|
1966
|
+
if isinstance(value, bytes):
|
|
1967
|
+
value = value.decode("utf-8")
|
|
1968
|
+
try:
|
|
1969
|
+
operator = NumericOperator.get(value[0])
|
|
1970
|
+
value = value[1:]
|
|
1971
|
+
except KeyError:
|
|
1972
|
+
operator = NumericOperator.EQUALS
|
|
1973
|
+
if value[0] == "~":
|
|
1974
|
+
int_value = parse_numeric(value[1:])
|
|
1975
|
+
int_value = (1 << (num_bytes * 8)) - 1 - int_value
|
|
1976
|
+
else:
|
|
1977
|
+
int_value = parse_numeric(value)
|
|
1978
|
+
return IntegerValue(value=int_value, operator=operator)
|
|
1979
|
+
|
|
1980
|
+
|
|
1981
|
+
class FloatValue(NumericValue[float]):
|
|
1982
|
+
@staticmethod
|
|
1983
|
+
def parse(value: str, num_bytes: int) -> "FloatValue":
|
|
1984
|
+
try:
|
|
1985
|
+
operator = NumericOperator.get(value[0])
|
|
1986
|
+
value = value[1:]
|
|
1987
|
+
except KeyError:
|
|
1988
|
+
operator = NumericOperator.EQUALS
|
|
1989
|
+
if operator in (NumericOperator.ALL_BITS_SET, NumericOperator.ALL_BITS_CLEAR):
|
|
1990
|
+
raise ValueError(f"A floating point value cannot have the {operator.symbol} operator")
|
|
1991
|
+
return FloatValue(value=float(value), operator=operator)
|
|
1992
|
+
|
|
1993
|
+
|
|
1994
|
+
class NumericDataType(DataType[NumericValue]):
|
|
1995
|
+
def __init__(
|
|
1996
|
+
self,
|
|
1997
|
+
name: str,
|
|
1998
|
+
base_type: BaseNumericDataType,
|
|
1999
|
+
unsigned: bool = False,
|
|
2000
|
+
endianness: Endianness = Endianness.NATIVE,
|
|
2001
|
+
preprocess: Callable[[int], int] = lambda x: x
|
|
2002
|
+
):
|
|
2003
|
+
super().__init__(name)
|
|
2004
|
+
self.base_type: BaseNumericDataType = base_type
|
|
2005
|
+
self.unsigned: bool = unsigned
|
|
2006
|
+
self.endianness: Endianness = endianness
|
|
2007
|
+
self.preprocess: Callable[[int], int] = preprocess
|
|
2008
|
+
if self.endianness == Endianness.PDP and self.base_type.num_bytes != 4:
|
|
2009
|
+
raise ValueError(f"PDP endianness can only be used with four byte base types, not {self.base_type}")
|
|
2010
|
+
|
|
2011
|
+
def is_text(self, value: NumericValue) -> bool:
|
|
2012
|
+
return False
|
|
2013
|
+
|
|
2014
|
+
def parse_expected(self, specification: str) -> NumericValue:
|
|
2015
|
+
if specification.strip() == "x":
|
|
2016
|
+
return NumericWildcard()
|
|
2017
|
+
else:
|
|
2018
|
+
return NumericValue.parse(specification, self.base_type.num_bytes)
|
|
2019
|
+
|
|
2020
|
+
def match(self, data: bytes, expected: NumericValue) -> DataTypeMatch:
|
|
2021
|
+
if len(data) < self.base_type.num_bytes:
|
|
2022
|
+
return DataTypeMatch.INVALID
|
|
2023
|
+
elif self.endianness == Endianness.PDP:
|
|
2024
|
+
assert self.base_type.num_bytes == 4
|
|
2025
|
+
if self.unsigned:
|
|
2026
|
+
value = (struct.unpack("<H", data[:2])[0] << 16) | struct.unpack("<H", data[2:4])[0]
|
|
2027
|
+
else:
|
|
2028
|
+
be_data = bytes([data[1], data[0], data[3], data[2]])
|
|
2029
|
+
value = struct.unpack(">i", be_data)[0]
|
|
2030
|
+
else:
|
|
2031
|
+
if self.unsigned and self.base_type not in (BaseNumericDataType.DOUBLE, BaseNumericDataType.FLOAT):
|
|
2032
|
+
struct_fmt = self.base_type.struct_fmt.upper()
|
|
2033
|
+
else:
|
|
2034
|
+
struct_fmt = self.base_type.struct_fmt
|
|
2035
|
+
struct_fmt = f"{self.endianness.value}{struct_fmt}"
|
|
2036
|
+
try:
|
|
2037
|
+
value = struct.unpack(struct_fmt, data[:self.base_type.num_bytes])[0]
|
|
2038
|
+
except struct.error:
|
|
2039
|
+
return DataTypeMatch.INVALID
|
|
2040
|
+
if expected.test(value, self.unsigned, self.base_type.num_bytes, self.preprocess):
|
|
2041
|
+
value = self.preprocess(value)
|
|
2042
|
+
return DataTypeMatch(data[:self.base_type.num_bytes], self.base_type.to_value(value))
|
|
2043
|
+
else:
|
|
2044
|
+
return DataTypeMatch.INVALID
|
|
2045
|
+
|
|
2046
|
+
@staticmethod
|
|
2047
|
+
def parse(fmt: str) -> "NumericDataType":
|
|
2048
|
+
name = fmt
|
|
2049
|
+
if fmt.startswith("u"):
|
|
2050
|
+
fmt = fmt[1:]
|
|
2051
|
+
if fmt.startswith("double") or fmt.startswith("float"):
|
|
2052
|
+
raise ValueError(f"{name[1:]} cannot be unsigned")
|
|
2053
|
+
unsigned = True
|
|
2054
|
+
else:
|
|
2055
|
+
unsigned = False
|
|
2056
|
+
if fmt.startswith("le"):
|
|
2057
|
+
endianness = Endianness.LITTLE
|
|
2058
|
+
fmt = fmt[2:]
|
|
2059
|
+
elif fmt.startswith("be"):
|
|
2060
|
+
endianness = Endianness.BIG
|
|
2061
|
+
fmt = fmt[2:]
|
|
2062
|
+
elif fmt.startswith("me"):
|
|
2063
|
+
endianness = Endianness.PDP
|
|
2064
|
+
fmt = fmt[2:]
|
|
2065
|
+
else:
|
|
2066
|
+
endianness = Endianness.NATIVE
|
|
2067
|
+
for symbol, operator in (
|
|
2068
|
+
("&", lambda a, b: a & b),
|
|
2069
|
+
("%", lambda a, b: a % b),
|
|
2070
|
+
("+", lambda a, b: a + b),
|
|
2071
|
+
("-", lambda a, b: a - b),
|
|
2072
|
+
("^", lambda a, b: a ^ b),
|
|
2073
|
+
("/", lambda a, b: [a // b, a / b][isinstance(a, float)]),
|
|
2074
|
+
("*", lambda a, b: a * b),
|
|
2075
|
+
("|", lambda a, b: a | b)
|
|
2076
|
+
):
|
|
2077
|
+
pos = fmt.find(symbol)
|
|
2078
|
+
if pos > 0:
|
|
2079
|
+
operand = parse_numeric(fmt[pos+1:])
|
|
2080
|
+
preprocess = lambda n: operator(n, operand)
|
|
2081
|
+
fmt = fmt[:pos]
|
|
2082
|
+
break
|
|
2083
|
+
else:
|
|
2084
|
+
preprocess = lambda n: n
|
|
2085
|
+
if fmt not in BASE_NUMERIC_TYPES_BY_NAME:
|
|
2086
|
+
raise ValueError(f"Invalid numeric data type: {name!r}")
|
|
2087
|
+
return NumericDataType(
|
|
2088
|
+
name=name,
|
|
2089
|
+
base_type=BASE_NUMERIC_TYPES_BY_NAME[fmt],
|
|
2090
|
+
unsigned=unsigned,
|
|
2091
|
+
endianness=endianness,
|
|
2092
|
+
preprocess=preprocess
|
|
2093
|
+
)
|
|
2094
|
+
|
|
2095
|
+
|
|
2096
|
+
class ConstantMatchTest(MagicTest, Generic[T]):
|
|
2097
|
+
def __init__(
|
|
2098
|
+
self,
|
|
2099
|
+
offset: Offset,
|
|
2100
|
+
data_type: DataType[T],
|
|
2101
|
+
constant: T,
|
|
2102
|
+
mime: Optional[str] = None,
|
|
2103
|
+
extensions: Iterable[str] = (),
|
|
2104
|
+
message: str = "",
|
|
2105
|
+
parent: Optional["MagicTest"] = None
|
|
2106
|
+
):
|
|
2107
|
+
super().__init__(offset=offset, mime=mime, extensions=extensions, message=message, parent=parent)
|
|
2108
|
+
self.data_type: DataType[T] = data_type
|
|
2109
|
+
self.constant: T = constant
|
|
2110
|
+
|
|
2111
|
+
def subtest_type(self) -> TestType:
|
|
2112
|
+
if self.data_type.is_text(self.constant):
|
|
2113
|
+
return TestType.TEXT
|
|
2114
|
+
else:
|
|
2115
|
+
return TestType.BINARY
|
|
2116
|
+
|
|
2117
|
+
def calculate_absolute_offset(self, data: bytes, parent_match: Optional[TestResult] = None) -> int:
|
|
2118
|
+
return self.offset.to_absolute(data, parent_match, self.data_type.allows_invalid_offsets(self.constant))
|
|
2119
|
+
|
|
2120
|
+
def test(self, data: bytes, absolute_offset: int, parent_match: Optional[TestResult]) -> TestResult:
|
|
2121
|
+
match = self.data_type.match(data[absolute_offset:], self.constant)
|
|
2122
|
+
if match:
|
|
2123
|
+
return MatchedTest(self, offset=absolute_offset + match.initial_offset, length=len(match.raw_match),
|
|
2124
|
+
value=match.value, parent=parent_match)
|
|
2125
|
+
else:
|
|
2126
|
+
return FailedTest(
|
|
2127
|
+
self,
|
|
2128
|
+
offset=absolute_offset,
|
|
2129
|
+
parent=parent_match,
|
|
2130
|
+
message=f"expected {self.constant!s}"
|
|
2131
|
+
)
|
|
2132
|
+
|
|
2133
|
+
|
|
2134
|
+
class OffsetMatchTest(MagicTest):
|
|
2135
|
+
def __init__(
|
|
2136
|
+
self,
|
|
2137
|
+
offset: Offset,
|
|
2138
|
+
value: IntegerValue,
|
|
2139
|
+
mime: Optional[str] = None,
|
|
2140
|
+
extensions: Iterable[str] = (),
|
|
2141
|
+
message: str = "",
|
|
2142
|
+
parent: Optional["MagicTest"] = None
|
|
2143
|
+
):
|
|
2144
|
+
super().__init__(offset=offset, mime=mime, extensions=extensions, message=message, parent=parent)
|
|
2145
|
+
self.value: IntegerValue = value
|
|
2146
|
+
|
|
2147
|
+
def subtest_type(self) -> TestType:
|
|
2148
|
+
return TestType.UNKNOWN
|
|
2149
|
+
|
|
2150
|
+
def test(self, data: bytes, absolute_offset: int, parent_match: Optional[TestResult]) -> TestResult:
|
|
2151
|
+
if self.value.test(absolute_offset, unsigned=True, num_bytes=8):
|
|
2152
|
+
return MatchedTest(self, offset=0, length=absolute_offset, value=absolute_offset, parent=parent_match)
|
|
2153
|
+
else:
|
|
2154
|
+
return FailedTest(
|
|
2155
|
+
test=self,
|
|
2156
|
+
offset=absolute_offset,
|
|
2157
|
+
parent=parent_match,
|
|
2158
|
+
message=f"expected {self.value!r}"
|
|
2159
|
+
)
|
|
2160
|
+
|
|
2161
|
+
|
|
2162
|
+
class IndirectResult(MatchedTest):
|
|
2163
|
+
def __init__(self, test: "IndirectTest", offset: int, parent: Optional[TestResult] = None):
|
|
2164
|
+
super().__init__(test, value=None, offset=offset, length=0, parent=parent)
|
|
2165
|
+
|
|
2166
|
+
def explain(self, writer: ANSIWriter, file: Streamable):
|
|
2167
|
+
writer.write(f"Indirect test {self.test} matched at offset {self.offset}\n", dim=True)
|
|
2168
|
+
|
|
2169
|
+
|
|
2170
|
+
class IndirectTest(MagicTest):
|
|
2171
|
+
def __init__(
|
|
2172
|
+
self,
|
|
2173
|
+
matcher: "MagicMatcher",
|
|
2174
|
+
offset: Offset,
|
|
2175
|
+
relative: bool = False,
|
|
2176
|
+
mime: Optional[str] = None,
|
|
2177
|
+
extensions: Iterable[str] = (),
|
|
2178
|
+
message: str = "",
|
|
2179
|
+
parent: Optional[MagicTest] = None
|
|
2180
|
+
):
|
|
2181
|
+
super().__init__(offset=offset, mime=mime, extensions=extensions, message=message, parent=parent)
|
|
2182
|
+
self.matcher: MagicMatcher = matcher
|
|
2183
|
+
self.relative: bool = relative
|
|
2184
|
+
self.can_match_mime = True
|
|
2185
|
+
self.can_be_indirect = True
|
|
2186
|
+
self._type = TestType.BINARY
|
|
2187
|
+
p = parent
|
|
2188
|
+
while p is not None:
|
|
2189
|
+
p.can_be_indirect = True
|
|
2190
|
+
p.can_match_mime = True
|
|
2191
|
+
p._type = TestType.BINARY
|
|
2192
|
+
p = p.parent
|
|
2193
|
+
|
|
2194
|
+
def subtest_type(self) -> TestType:
|
|
2195
|
+
return TestType.BINARY
|
|
2196
|
+
|
|
2197
|
+
def test(self, data: bytes, absolute_offset: int, parent_match: Optional[TestResult]) -> TestResult:
|
|
2198
|
+
if self.relative:
|
|
2199
|
+
if parent_match is None:
|
|
2200
|
+
return FailedTest(
|
|
2201
|
+
test=self,
|
|
2202
|
+
offset=absolute_offset,
|
|
2203
|
+
parent=parent_match,
|
|
2204
|
+
message="the test is relative but it does not have a parent test (this is likely a bug in the magic"
|
|
2205
|
+
" definition file)"
|
|
2206
|
+
)
|
|
2207
|
+
absolute_offset += parent_match.offset
|
|
2208
|
+
return IndirectResult(self, absolute_offset, parent_match)
|
|
2209
|
+
|
|
2210
|
+
|
|
2211
|
+
class NamedTest(MagicTest):
|
|
2212
|
+
def __init__(
|
|
2213
|
+
self,
|
|
2214
|
+
name: str,
|
|
2215
|
+
offset: Offset,
|
|
2216
|
+
mime: Optional[str] = None,
|
|
2217
|
+
extensions: Iterable[str] = (),
|
|
2218
|
+
message: str = ""
|
|
2219
|
+
):
|
|
2220
|
+
if not message:
|
|
2221
|
+
# by default, named tests should not add a space if they don't contain an explicit message
|
|
2222
|
+
message = "\b"
|
|
2223
|
+
assert isinstance(offset, AbsoluteOffset) and offset.offset == 0
|
|
2224
|
+
|
|
2225
|
+
class NamedTestOffset(Offset):
|
|
2226
|
+
def to_absolute(self, data: bytes, last_match: Optional[TestResult], allow_invalid: bool = False) -> int:
|
|
2227
|
+
assert last_match is not None
|
|
2228
|
+
return last_match.offset
|
|
2229
|
+
offset = NamedTestOffset()
|
|
2230
|
+
super().__init__(offset=offset, mime=mime, extensions=extensions, message=message, parent=None)
|
|
2231
|
+
self.name: str = name
|
|
2232
|
+
self.named_test = self
|
|
2233
|
+
self.used_by: Set[UseTest] = set()
|
|
2234
|
+
|
|
2235
|
+
def subtest_type(self) -> TestType:
|
|
2236
|
+
return TestType.UNKNOWN
|
|
2237
|
+
|
|
2238
|
+
def test(self, data: bytes, absolute_offset: int, parent_match: Optional[TestResult]) -> MatchedTest:
|
|
2239
|
+
if parent_match is not None:
|
|
2240
|
+
return MatchedTest(self, offset=parent_match.offset + parent_match.length, length=0, value=self.name,
|
|
2241
|
+
parent=parent_match)
|
|
2242
|
+
else:
|
|
2243
|
+
raise ValueError("A named test must always be called from a `use` test.")
|
|
2244
|
+
|
|
2245
|
+
def __str__(self):
|
|
2246
|
+
return self.name
|
|
2247
|
+
|
|
2248
|
+
|
|
2249
|
+
class UseTest(MagicTest):
|
|
2250
|
+
def __init__(
|
|
2251
|
+
self,
|
|
2252
|
+
referenced_test: NamedTest,
|
|
2253
|
+
offset: Offset,
|
|
2254
|
+
mime: Optional[str] = None,
|
|
2255
|
+
extensions: Iterable[str] = (),
|
|
2256
|
+
message: str = "",
|
|
2257
|
+
parent: Optional["MagicTest"] = None,
|
|
2258
|
+
flip_endianness: bool = False,
|
|
2259
|
+
late_binding: bool = False
|
|
2260
|
+
):
|
|
2261
|
+
super().__init__(offset=offset, mime=mime, extensions=extensions, message=message, parent=parent)
|
|
2262
|
+
self.referenced_test: NamedTest = referenced_test
|
|
2263
|
+
self.flip_endianness: bool = flip_endianness
|
|
2264
|
+
self.late_binding: bool = late_binding
|
|
2265
|
+
referenced_test.used_by.add(self)
|
|
2266
|
+
|
|
2267
|
+
def subtest_type(self) -> TestType:
|
|
2268
|
+
return self.referenced_test.test_type
|
|
2269
|
+
|
|
2270
|
+
def referenced_tests(self) -> Set[NamedTest]:
|
|
2271
|
+
result = super().referenced_tests() | {self.referenced_test}
|
|
2272
|
+
if self.named_test is None or self.named_test.name != self.referenced_test.name:
|
|
2273
|
+
result |= self.referenced_test.referenced_tests()
|
|
2274
|
+
return result
|
|
2275
|
+
|
|
2276
|
+
def _match(self, context: MatchContext, parent_match: Optional[TestResult] = None) -> Iterator[TestResult]:
|
|
2277
|
+
if self.flip_endianness:
|
|
2278
|
+
raise NotImplementedError("TODO: Add support for use tests with flipped endianness")
|
|
2279
|
+
try:
|
|
2280
|
+
absolute_offset = self.offset.to_absolute(context.data, last_match=parent_match)
|
|
2281
|
+
except InvalidOffsetError:
|
|
2282
|
+
return None
|
|
2283
|
+
log.trace(
|
|
2284
|
+
f"{self.source_info!s}\tTrue\t{absolute_offset}\t{context.data[absolute_offset:absolute_offset + 20]!r}"
|
|
2285
|
+
)
|
|
2286
|
+
use_match = MatchedTest(self, None, absolute_offset, 0, parent=parent_match)
|
|
2287
|
+
yielded = False
|
|
2288
|
+
for named_result in self.referenced_test._match(context, use_match):
|
|
2289
|
+
if not yielded:
|
|
2290
|
+
yielded = True
|
|
2291
|
+
yield use_match
|
|
2292
|
+
yield named_result
|
|
2293
|
+
if not yielded:
|
|
2294
|
+
# the named test did not match anything, so don't try any of our children
|
|
2295
|
+
return
|
|
2296
|
+
elif context.only_match_mime and not self.can_match_mime:
|
|
2297
|
+
# none of our children can produce a mime type
|
|
2298
|
+
return
|
|
2299
|
+
for child in self.children:
|
|
2300
|
+
if not context.only_match_mime or child.can_match_mime:
|
|
2301
|
+
yield from child._match(context=context, parent_match=use_match)
|
|
2302
|
+
|
|
2303
|
+
def test(self, data: bytes, absolute_offset: int, parent_match: Optional[TestResult]) -> TestResult:
|
|
2304
|
+
raise NotImplementedError("This function should never be called")
|
|
2305
|
+
|
|
2306
|
+
|
|
2307
|
+
class JSONTest(MagicTest):
|
|
2308
|
+
def test(self, data: bytes, absolute_offset: int, parent_match: Optional[TestResult]) -> Optional[TestResult]:
|
|
2309
|
+
try:
|
|
2310
|
+
parsed = json.loads(data[absolute_offset:])
|
|
2311
|
+
return MatchedTest(self, offset=absolute_offset, length=len(data) - absolute_offset, value=parsed,
|
|
2312
|
+
parent=parent_match)
|
|
2313
|
+
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
|
2314
|
+
return FailedTest(
|
|
2315
|
+
test=self,
|
|
2316
|
+
offset=absolute_offset,
|
|
2317
|
+
parent=parent_match,
|
|
2318
|
+
message=str(e)
|
|
2319
|
+
)
|
|
2320
|
+
|
|
2321
|
+
def subtest_type(self) -> TestType:
|
|
2322
|
+
return TestType.TEXT
|
|
2323
|
+
|
|
2324
|
+
|
|
2325
|
+
class CSVTest(MagicTest):
|
|
2326
|
+
def test(self, data: bytes, absolute_offset: int, parent_match: Optional[TestResult]) -> TestResult:
|
|
2327
|
+
try:
|
|
2328
|
+
text = data[absolute_offset:].decode("utf-8")
|
|
2329
|
+
except UnicodeDecodeError as e:
|
|
2330
|
+
return FailedTest(test=self, offset=absolute_offset, parent=parent_match, message=str(e))
|
|
2331
|
+
for dialect in csv.list_dialects():
|
|
2332
|
+
string_data = StringIO(text, newline="")
|
|
2333
|
+
reader = csv.reader(string_data, dialect=dialect)
|
|
2334
|
+
valid = False
|
|
2335
|
+
try:
|
|
2336
|
+
for i, row in enumerate(reader):
|
|
2337
|
+
if i == 0:
|
|
2338
|
+
num_cols = len(row)
|
|
2339
|
+
if num_cols < 2:
|
|
2340
|
+
# CSVs should have at least two rows:
|
|
2341
|
+
break
|
|
2342
|
+
valid = True
|
|
2343
|
+
elif len(row) != num_cols:
|
|
2344
|
+
# every row of the CSV should have the same number of columns
|
|
2345
|
+
valid = False
|
|
2346
|
+
break
|
|
2347
|
+
except csv.Error:
|
|
2348
|
+
continue
|
|
2349
|
+
if valid:
|
|
2350
|
+
# every row was valid, and we had at least one row
|
|
2351
|
+
return MatchedTest(self, offset=absolute_offset, length=len(data) - absolute_offset, value=dialect,
|
|
2352
|
+
parent=parent_match)
|
|
2353
|
+
return FailedTest(
|
|
2354
|
+
test=self,
|
|
2355
|
+
offset=absolute_offset,
|
|
2356
|
+
parent=parent_match,
|
|
2357
|
+
message=f"the input did not match a known CSV dialect ({', '.join(csv.list_dialects())})"
|
|
2358
|
+
)
|
|
2359
|
+
|
|
2360
|
+
def subtest_type(self) -> TestType:
|
|
2361
|
+
return TestType.TEXT
|
|
2362
|
+
|
|
2363
|
+
|
|
2364
|
+
class DefaultTest(MagicTest):
|
|
2365
|
+
def subtest_type(self) -> TestType:
|
|
2366
|
+
return TestType.UNKNOWN
|
|
2367
|
+
|
|
2368
|
+
def test(self, data: bytes, absolute_offset: int, parent_match: Optional[TestResult]) -> TestResult:
|
|
2369
|
+
if parent_match is None or not parent_match.child_matched:
|
|
2370
|
+
return MatchedTest(self, offset=absolute_offset, length=0, value=True, parent=parent_match)
|
|
2371
|
+
else:
|
|
2372
|
+
return FailedTest(self, offset=absolute_offset, parent=parent_match, message="the parent test already "
|
|
2373
|
+
"has a child that matched")
|
|
2374
|
+
|
|
2375
|
+
|
|
2376
|
+
class ClearTest(MagicTest):
|
|
2377
|
+
def subtest_type(self) -> TestType:
|
|
2378
|
+
return TestType.UNKNOWN
|
|
2379
|
+
|
|
2380
|
+
def test(self, data: bytes, absolute_offset: int, parent_match: Optional[TestResult]) -> MatchedTest:
|
|
2381
|
+
if parent_match is None:
|
|
2382
|
+
return MatchedTest(self, offset=absolute_offset, length=0, value=None)
|
|
2383
|
+
else:
|
|
2384
|
+
parent_match.child_matched = False
|
|
2385
|
+
return MatchedTest(self, offset=absolute_offset, length=0, parent=parent_match, value=None)
|
|
2386
|
+
|
|
2387
|
+
|
|
2388
|
+
class DERTest(MagicTest):
|
|
2389
|
+
def subtest_type(self) -> TestType:
|
|
2390
|
+
return TestType.BINARY
|
|
2391
|
+
|
|
2392
|
+
def test(self, data: bytes, absolute_offset: int, parent_match: Optional[TestResult]) -> TestResult:
|
|
2393
|
+
raise NotImplementedError(
|
|
2394
|
+
"TODO: Implement support for the DER test (e.g., using the Kaitai asn1_der.py parser)"
|
|
2395
|
+
)
|
|
2396
|
+
|
|
2397
|
+
|
|
2398
|
+
class PlainTextTest(MagicTest):
|
|
2399
|
+
AUTO_REGISTER_TEST = False
|
|
2400
|
+
|
|
2401
|
+
def __init__(
|
|
2402
|
+
self,
|
|
2403
|
+
offset: Offset = AbsoluteOffset(0),
|
|
2404
|
+
mime: Union[str, TernaryExecutableMessage] = "text/plain",
|
|
2405
|
+
extensions: Iterable[str] = ("txt",),
|
|
2406
|
+
parent: Optional["MagicTest"] = None,
|
|
2407
|
+
comments: Iterable[Comment] = (),
|
|
2408
|
+
minimum_encoding_confidence: float = 0.5
|
|
2409
|
+
):
|
|
2410
|
+
super().__init__(offset, mime, extensions, "", parent, comments)
|
|
2411
|
+
self.minimum_encoding_confidence: float = minimum_encoding_confidence
|
|
2412
|
+
|
|
2413
|
+
def subtest_type(self) -> TestType:
|
|
2414
|
+
return TestType.TEXT
|
|
2415
|
+
|
|
2416
|
+
def test(self, data: bytes, absolute_offset: int, parent_match: Optional[TestResult]) -> TestResult:
|
|
2417
|
+
if not isinstance(self.message, ConstantMessage) or self.message.message:
|
|
2418
|
+
raise ValueError(f"A new PlainTextTest must be constructed for each call to .test")
|
|
2419
|
+
detector = UniversalDetector()
|
|
2420
|
+
offset = absolute_offset
|
|
2421
|
+
while not detector.done and offset < min(len(data), 5000000):
|
|
2422
|
+
# feed 1kB at a time until we have high confidence in the classification
|
|
2423
|
+
# up to a maximum of 5MiB
|
|
2424
|
+
detector.feed(data[offset:offset+1024])
|
|
2425
|
+
offset += 1024
|
|
2426
|
+
detector.close()
|
|
2427
|
+
if detector.result["confidence"] >= self.minimum_encoding_confidence:
|
|
2428
|
+
encoding = detector.result["encoding"]
|
|
2429
|
+
try:
|
|
2430
|
+
value = data[absolute_offset:].decode(encoding)
|
|
2431
|
+
except UnicodeDecodeError:
|
|
2432
|
+
value = data[absolute_offset:]
|
|
2433
|
+
self.message = ConstantMessage(f"{encoding} text")
|
|
2434
|
+
return MatchedTest(self, offset=absolute_offset, length=len(data) - absolute_offset, parent=parent_match,
|
|
2435
|
+
value=value)
|
|
2436
|
+
else:
|
|
2437
|
+
return FailedTest(self, offset=absolute_offset, parent=parent_match, message="the data do not appear to "
|
|
2438
|
+
"be encoded in a text format")
|
|
2439
|
+
|
|
2440
|
+
|
|
2441
|
+
class OctetStreamTest(MagicTest):
|
|
2442
|
+
AUTO_REGISTER_TEST = False
|
|
2443
|
+
|
|
2444
|
+
def __init__(
|
|
2445
|
+
self,
|
|
2446
|
+
offset: Offset = AbsoluteOffset(0),
|
|
2447
|
+
mime: Union[str, TernaryExecutableMessage] = "application/octet-stream",
|
|
2448
|
+
extensions: Iterable[str] = (),
|
|
2449
|
+
message: Union[str, Message] = "data",
|
|
2450
|
+
parent: Optional["MagicTest"] = None,
|
|
2451
|
+
comments: Iterable[Comment] = ()
|
|
2452
|
+
):
|
|
2453
|
+
super().__init__(offset, mime, extensions, message, parent, comments)
|
|
2454
|
+
|
|
2455
|
+
def subtest_type(self) -> TestType:
|
|
2456
|
+
return TestType.BINARY
|
|
2457
|
+
|
|
2458
|
+
def test(self, data: bytes, absolute_offset: int, parent_match: Optional[TestResult]) -> TestResult:
|
|
2459
|
+
# Everything is an octet stream!
|
|
2460
|
+
return MatchedTest(self, offset=absolute_offset, length=len(data) - absolute_offset, parent=parent_match,
|
|
2461
|
+
value=data)
|
|
2462
|
+
|
|
2463
|
+
|
|
2464
|
+
TEST_PATTERN: Pattern[str] = re.compile(
|
|
2465
|
+
r"^(?P<level>[>]*)(?P<offset>[^\s!][^\s]*)\s+(?P<data_type>[^\s]+)\s+(?P<remainder>.+)$"
|
|
2466
|
+
)
|
|
2467
|
+
MIME_PATTERN: Pattern[str] = re.compile(r"^!:mime\s+([^#]+?)\s*(#.*)?$")
|
|
2468
|
+
EXTENSION_PATTERN: Pattern[str] = re.compile(r"^!:ext\s+([^\s]+)\s*(#.*)?$")
|
|
2469
|
+
|
|
2470
|
+
|
|
2471
|
+
def _split_with_escapes(text: str) -> Tuple[str, str]:
|
|
2472
|
+
first_length = 0
|
|
2473
|
+
escaped = False
|
|
2474
|
+
delimiter_length = 1
|
|
2475
|
+
for c in text:
|
|
2476
|
+
if escaped:
|
|
2477
|
+
escaped = False
|
|
2478
|
+
elif c == "\\":
|
|
2479
|
+
escaped = True
|
|
2480
|
+
elif c == "\n":
|
|
2481
|
+
if first_length > 0 and text[first_length - 1] == "\r":
|
|
2482
|
+
# strip the \r from trailing \r\n
|
|
2483
|
+
first_length -= 1
|
|
2484
|
+
delimiter_length = 2
|
|
2485
|
+
break
|
|
2486
|
+
elif c == " " or c == "\t":
|
|
2487
|
+
break
|
|
2488
|
+
first_length += 1
|
|
2489
|
+
return text[:first_length], text[first_length + delimiter_length:]
|
|
2490
|
+
|
|
2491
|
+
|
|
2492
|
+
class Match:
|
|
2493
|
+
def __init__(
|
|
2494
|
+
self, matcher: "MagicMatcher", context: MatchContext, results: Iterable[TestResult]
|
|
2495
|
+
):
|
|
2496
|
+
self.matcher: MagicMatcher = matcher
|
|
2497
|
+
self.context: MatchContext = context
|
|
2498
|
+
self._result_iter: Optional[Iterator[TestResult]] = iter(results)
|
|
2499
|
+
self._results: List[TestResult] = []
|
|
2500
|
+
|
|
2501
|
+
@property
|
|
2502
|
+
def data(self) -> bytes:
|
|
2503
|
+
return self.context.data
|
|
2504
|
+
|
|
2505
|
+
@property
|
|
2506
|
+
def only_match_mime(self) -> bool:
|
|
2507
|
+
return self.context.only_match_mime
|
|
2508
|
+
|
|
2509
|
+
@property
|
|
2510
|
+
def mimetypes(self) -> LazyIterableSet[str]:
|
|
2511
|
+
return LazyIterableSet((
|
|
2512
|
+
result.test.mime.resolve(self.context) for result in self if result.test.mime is not None)
|
|
2513
|
+
)
|
|
2514
|
+
|
|
2515
|
+
@property
|
|
2516
|
+
def extensions(self) -> LazyIterableSet[str]:
|
|
2517
|
+
def _extensions():
|
|
2518
|
+
for result in self:
|
|
2519
|
+
yield from result.test.extensions
|
|
2520
|
+
return LazyIterableSet(_extensions())
|
|
2521
|
+
|
|
2522
|
+
def explain(self, file: Streamable, ansi_color: Optional[bool] = None) -> str:
|
|
2523
|
+
if ansi_color is None:
|
|
2524
|
+
ansi_color = sys.stdout.isatty()
|
|
2525
|
+
writer = ANSIWriter(use_ansi=ansi_color)
|
|
2526
|
+
for result in self:
|
|
2527
|
+
result.explain(writer, file=file)
|
|
2528
|
+
return str(writer)
|
|
2529
|
+
|
|
2530
|
+
def __bool__(self):
|
|
2531
|
+
return any(m for m in self.mimetypes) or any(e for e in self.extensions) or bool(self.message())
|
|
2532
|
+
|
|
2533
|
+
def __len__(self):
|
|
2534
|
+
if self._result_iter is not None:
|
|
2535
|
+
# we have not yet finished collecting the results
|
|
2536
|
+
for _ in self:
|
|
2537
|
+
pass
|
|
2538
|
+
assert self._result_iter is None
|
|
2539
|
+
return len(self._results)
|
|
2540
|
+
|
|
2541
|
+
def __getitem__(self, index: int) -> TestResult:
|
|
2542
|
+
while self._result_iter is not None and index <= len(self._results):
|
|
2543
|
+
# we have not yet finished collecting the results
|
|
2544
|
+
try:
|
|
2545
|
+
result = next(self._result_iter)
|
|
2546
|
+
self._results.append(result)
|
|
2547
|
+
if isinstance(result, IndirectResult):
|
|
2548
|
+
for match in self.matcher.match(self.context[result.offset:]):
|
|
2549
|
+
self._results.extend(match)
|
|
2550
|
+
except StopIteration:
|
|
2551
|
+
self._result_iter = None
|
|
2552
|
+
return self._results[index]
|
|
2553
|
+
|
|
2554
|
+
def __iter__(self) -> Iterator[TestResult]:
|
|
2555
|
+
if self._result_iter is None:
|
|
2556
|
+
yield from self._results
|
|
2557
|
+
return
|
|
2558
|
+
i = 0
|
|
2559
|
+
while True:
|
|
2560
|
+
try:
|
|
2561
|
+
yield self[i]
|
|
2562
|
+
except IndexError:
|
|
2563
|
+
break
|
|
2564
|
+
i += 1
|
|
2565
|
+
|
|
2566
|
+
def message(self) -> str:
|
|
2567
|
+
msg = ""
|
|
2568
|
+
for result in self:
|
|
2569
|
+
m = result.test.message.resolve(self.context).lstrip()
|
|
2570
|
+
if not m:
|
|
2571
|
+
continue
|
|
2572
|
+
elif m.startswith("\b"):
|
|
2573
|
+
result_str = m[1:]
|
|
2574
|
+
else:
|
|
2575
|
+
result_str = m
|
|
2576
|
+
if msg and not msg[-1] in " \t\r\n\v\f":
|
|
2577
|
+
msg = f"{msg} "
|
|
2578
|
+
if "%u" in result_str and result.value < 0:
|
|
2579
|
+
# sometimes we parsed a negative value and want to print it as an unsigned int:
|
|
2580
|
+
result_str = result_str % (result.value + 2**(8 * result.length),)
|
|
2581
|
+
elif "%" in result_str.replace("%%", ""):
|
|
2582
|
+
result_str = result_str.replace("%ll", "%")
|
|
2583
|
+
result_str = result_str.replace("%#ll", "0x%")
|
|
2584
|
+
try:
|
|
2585
|
+
result_str = result_str % (result.value,)
|
|
2586
|
+
except ValueError as e:
|
|
2587
|
+
log.error(f"Error formatting message {result_str!r} with value {result.value!r}: {e!s}")
|
|
2588
|
+
result_str = result_str.replace("%%", "%")
|
|
2589
|
+
msg = f"{msg}{result_str}"
|
|
2590
|
+
return msg
|
|
2591
|
+
|
|
2592
|
+
__str__ = message
|
|
2593
|
+
|
|
2594
|
+
|
|
2595
|
+
class DefaultMagicMatcher:
|
|
2596
|
+
_DEFAULT_INSTANCE: Optional["MagicMatcher"] = None
|
|
2597
|
+
|
|
2598
|
+
def __get__(self, instance, owner) -> "MagicMatcher":
|
|
2599
|
+
if DefaultMagicMatcher._DEFAULT_INSTANCE is None:
|
|
2600
|
+
# DefaultMagicMatcher._DEFAULT_INSTANCE = MagicMatcher.parse(*MAGIC_DEFS)
|
|
2601
|
+
# FIXME: skip the DER definition for now because we don't yet support it
|
|
2602
|
+
DefaultMagicMatcher._DEFAULT_INSTANCE = MagicMatcher.parse(*(d for d in MAGIC_DEFS if d.name != "der"))
|
|
2603
|
+
return DefaultMagicMatcher._DEFAULT_INSTANCE
|
|
2604
|
+
|
|
2605
|
+
def __set__(self, instance, value: Optional["MagicMatcher"]):
|
|
2606
|
+
DefaultMagicMatcher._DEFAULT_INSTANCE = value
|
|
2607
|
+
|
|
2608
|
+
def __delete__(self, instance):
|
|
2609
|
+
DefaultMagicMatcher._DEFAULT_INSTANCE = None
|
|
2610
|
+
|
|
2611
|
+
|
|
2612
|
+
class MagicMatcher:
|
|
2613
|
+
DEFAULT_INSTANCE: "MagicMatcher" = DefaultMagicMatcher() # type: ignore
|
|
2614
|
+
|
|
2615
|
+
def __init__(self, tests: Iterable[MagicTest] = ()):
|
|
2616
|
+
self._tests: List[MagicTest] = []
|
|
2617
|
+
self.named_tests: Dict[str, NamedTest] = {}
|
|
2618
|
+
self._tests_by_mime: Dict[str, Set[MagicTest]] = defaultdict(set)
|
|
2619
|
+
self._tests_by_ext: Dict[str, Set[MagicTest]] = defaultdict(set)
|
|
2620
|
+
self._tests_that_can_be_indirect: Set[MagicTest] = set()
|
|
2621
|
+
self._non_text_tests: Set[MagicTest] = set()
|
|
2622
|
+
self._text_tests: Set[MagicTest] = set()
|
|
2623
|
+
self._dirty: bool = True
|
|
2624
|
+
for test in tests:
|
|
2625
|
+
self.add(test)
|
|
2626
|
+
|
|
2627
|
+
@property
|
|
2628
|
+
def tests_by_mime(self) -> Dict[str, Set[MagicTest]]:
|
|
2629
|
+
self._reassign_test_types()
|
|
2630
|
+
return self._tests_by_mime
|
|
2631
|
+
|
|
2632
|
+
@property
|
|
2633
|
+
def tests_by_ext(self) -> Dict[str, Set[MagicTest]]:
|
|
2634
|
+
self._reassign_test_types()
|
|
2635
|
+
return self._tests_by_ext
|
|
2636
|
+
|
|
2637
|
+
@property
|
|
2638
|
+
def tests_that_can_be_indirect(self) -> Set[MagicTest]:
|
|
2639
|
+
self._reassign_test_types()
|
|
2640
|
+
return self._tests_that_can_be_indirect
|
|
2641
|
+
|
|
2642
|
+
@property
|
|
2643
|
+
def non_text_tests(self) -> Set[MagicTest]:
|
|
2644
|
+
self._reassign_test_types()
|
|
2645
|
+
return self._non_text_tests
|
|
2646
|
+
|
|
2647
|
+
@property
|
|
2648
|
+
def text_tests(self) -> Set[MagicTest]:
|
|
2649
|
+
self._reassign_test_types()
|
|
2650
|
+
return self._text_tests
|
|
2651
|
+
|
|
2652
|
+
def add(self, test: Union[MagicTest, Path], test_type: TestType = TestType.UNKNOWN) -> List[MagicTest]:
|
|
2653
|
+
if not isinstance(test, MagicTest):
|
|
2654
|
+
level_zero_tests, _, tests_with_mime, indirect_tests = self._parse_file(test, self)
|
|
2655
|
+
for test in tests_with_mime:
|
|
2656
|
+
assert test.can_match_mime
|
|
2657
|
+
for ancestor in test.ancestors():
|
|
2658
|
+
ancestor.can_match_mime = True
|
|
2659
|
+
for test in indirect_tests:
|
|
2660
|
+
assert test.can_be_indirect
|
|
2661
|
+
assert test.can_match_mime
|
|
2662
|
+
for ancestor in test.ancestors():
|
|
2663
|
+
ancestor.can_be_indirect = True
|
|
2664
|
+
for test in level_zero_tests:
|
|
2665
|
+
self.add(test, test_type=test_type)
|
|
2666
|
+
return list(level_zero_tests)
|
|
2667
|
+
|
|
2668
|
+
if test_type != TestType.UNKNOWN:
|
|
2669
|
+
test.test_type = test_type
|
|
2670
|
+
|
|
2671
|
+
self._dirty = True
|
|
2672
|
+
|
|
2673
|
+
if isinstance(test, NamedTest):
|
|
2674
|
+
if test.name in self.named_tests:
|
|
2675
|
+
raise ValueError(f"A test named {test.name} already exists in this matcher!")
|
|
2676
|
+
self.named_tests[test.name] = test
|
|
2677
|
+
else:
|
|
2678
|
+
self._tests.append(test)
|
|
2679
|
+
|
|
2680
|
+
return [test]
|
|
2681
|
+
|
|
2682
|
+
def _reassign_test_types(self):
|
|
2683
|
+
if not self._dirty:
|
|
2684
|
+
return
|
|
2685
|
+
self._dirty = False
|
|
2686
|
+
self._text_tests = set()
|
|
2687
|
+
self._non_text_tests = set()
|
|
2688
|
+
self._tests_that_can_be_indirect = set()
|
|
2689
|
+
self._tests_by_ext = defaultdict(set)
|
|
2690
|
+
self._tests_by_mime = defaultdict(set)
|
|
2691
|
+
for test in self._tests:
|
|
2692
|
+
if test.test_type == TestType.TEXT:
|
|
2693
|
+
self._text_tests.add(test)
|
|
2694
|
+
else:
|
|
2695
|
+
self._non_text_tests.add(test)
|
|
2696
|
+
if test.can_be_indirect:
|
|
2697
|
+
self._tests_that_can_be_indirect.add(test)
|
|
2698
|
+
for mime in test.mimetypes():
|
|
2699
|
+
self._tests_by_mime[mime].add(test)
|
|
2700
|
+
for ext in test.all_extensions():
|
|
2701
|
+
self._tests_by_ext[ext].add(test)
|
|
2702
|
+
|
|
2703
|
+
def only_match(
|
|
2704
|
+
self,
|
|
2705
|
+
mimetypes: Optional[Iterable[str]] = None,
|
|
2706
|
+
extensions: Optional[Iterable[str]] = None
|
|
2707
|
+
) -> "MagicMatcher":
|
|
2708
|
+
"""
|
|
2709
|
+
Returns the simplest possible matcher that is capable of matching against all the given mimetypes or extensions.
|
|
2710
|
+
|
|
2711
|
+
If either argument is None, the resulting matcher will match against all such values. Therefore, if both
|
|
2712
|
+
arguments are None, the resulting matcher will be equivalent to this matcher.
|
|
2713
|
+
|
|
2714
|
+
"""
|
|
2715
|
+
if mimetypes is None and extensions is None:
|
|
2716
|
+
return self
|
|
2717
|
+
tests: Set[MagicTest] = {
|
|
2718
|
+
indirect_test for indirect_test in self.tests_that_can_be_indirect
|
|
2719
|
+
if not any(True for _ in indirect_test.mimetypes())
|
|
2720
|
+
}
|
|
2721
|
+
if mimetypes is not None:
|
|
2722
|
+
for mime in mimetypes:
|
|
2723
|
+
tests |= self.tests_by_mime[mime]
|
|
2724
|
+
if extensions is not None:
|
|
2725
|
+
for ext in extensions:
|
|
2726
|
+
tests |= self.tests_by_ext[ext]
|
|
2727
|
+
# add in all necessary named tests:
|
|
2728
|
+
required_named_tests = set()
|
|
2729
|
+
for test in tests:
|
|
2730
|
+
required_named_tests |= test.referenced_tests()
|
|
2731
|
+
return MagicMatcher(tests | required_named_tests)
|
|
2732
|
+
|
|
2733
|
+
def __iter__(self) -> Iterator[MagicTest]:
|
|
2734
|
+
return iter(self._tests)
|
|
2735
|
+
|
|
2736
|
+
@property
|
|
2737
|
+
def mimetypes(self) -> Iterable[str]:
|
|
2738
|
+
"""Returns the set of MIME types this matcher is capable of matching"""
|
|
2739
|
+
return self.tests_by_mime.keys()
|
|
2740
|
+
|
|
2741
|
+
@property
|
|
2742
|
+
def extensions(self) -> Iterable[str]:
|
|
2743
|
+
"""Returns the set of extensions this matcher is capable of matching"""
|
|
2744
|
+
return self.tests_by_ext.keys()
|
|
2745
|
+
|
|
2746
|
+
def match(self, to_match: Union[bytes, BinaryIO, str, Path, MatchContext]) -> Iterator[Match]:
|
|
2747
|
+
if isinstance(to_match, bytes):
|
|
2748
|
+
to_match = MatchContext(to_match)
|
|
2749
|
+
elif not isinstance(to_match, MatchContext):
|
|
2750
|
+
to_match = MatchContext.load(to_match)
|
|
2751
|
+
yielded = False
|
|
2752
|
+
for test in log.range(self.non_text_tests, desc="binary matching", unit=" tests", delay=1.0):
|
|
2753
|
+
m = Match(matcher=self, context=to_match, results=test.match(to_match))
|
|
2754
|
+
if m and (not to_match.only_match_mime or any(t is not None for t in m.mimetypes)):
|
|
2755
|
+
yield m
|
|
2756
|
+
yielded = True
|
|
2757
|
+
# is this a plain text file?
|
|
2758
|
+
text_matcher = Match(matcher=self, context=to_match, results=PlainTextTest().match(to_match))
|
|
2759
|
+
is_text = text_matcher and (not to_match.only_match_mime or any(t is not None for t in text_matcher.mimetypes))
|
|
2760
|
+
if is_text:
|
|
2761
|
+
# this is a text file, so try all of the textual tests:
|
|
2762
|
+
for test in log.range(self.text_tests, desc="text matching", unit=" tests", delay=1.0):
|
|
2763
|
+
m = Match(matcher=self, context=to_match, results=test.match(to_match))
|
|
2764
|
+
if m and (not to_match.only_match_mime or any(t is not None for t in m.mimetypes)):
|
|
2765
|
+
yield m
|
|
2766
|
+
yielded = True
|
|
2767
|
+
if not yielded:
|
|
2768
|
+
if is_text:
|
|
2769
|
+
yield text_matcher
|
|
2770
|
+
else:
|
|
2771
|
+
yield Match(matcher=self, context=to_match, results=OctetStreamTest().match(to_match))
|
|
2772
|
+
|
|
2773
|
+
@staticmethod
|
|
2774
|
+
def parse_test(
|
|
2775
|
+
line: str,
|
|
2776
|
+
def_file: Path,
|
|
2777
|
+
line_number: int,
|
|
2778
|
+
parent: Optional[MagicTest] = None,
|
|
2779
|
+
matcher: Optional["MagicMatcher"] = None
|
|
2780
|
+
) -> Optional[MagicTest]:
|
|
2781
|
+
m = TEST_PATTERN.match(line)
|
|
2782
|
+
if not m:
|
|
2783
|
+
return None
|
|
2784
|
+
level = len(m.group("level"))
|
|
2785
|
+
while parent is not None and parent.level >= level:
|
|
2786
|
+
parent = parent.parent
|
|
2787
|
+
if parent is None and level != 0:
|
|
2788
|
+
raise ValueError(f"{def_file!s} line {line_number}: Invalid level for test {line!r}")
|
|
2789
|
+
test_str, message = _split_with_escapes(m.group("remainder"))
|
|
2790
|
+
message = unescape(message).decode("utf-8")
|
|
2791
|
+
try:
|
|
2792
|
+
offset = Offset.parse(m.group("offset"))
|
|
2793
|
+
except ValueError as e:
|
|
2794
|
+
raise ValueError(f"{def_file!s} line {line_number}: {e!s}")
|
|
2795
|
+
data_type = m.group("data_type")
|
|
2796
|
+
if data_type == "name":
|
|
2797
|
+
if parent is not None:
|
|
2798
|
+
raise ValueError(f"{def_file!s} line {line_number}: A named test must be at level 0")
|
|
2799
|
+
elif test_str in matcher.named_tests:
|
|
2800
|
+
raise ValueError(f"{def_file!s} line {line_number}: Duplicate test named {test_str!r}")
|
|
2801
|
+
test = NamedTest(name=test_str, offset=offset, message=message)
|
|
2802
|
+
matcher.named_tests[test_str] = test
|
|
2803
|
+
test.source_info = SourceInfo(def_file, line_number, line)
|
|
2804
|
+
else:
|
|
2805
|
+
if data_type == "default":
|
|
2806
|
+
if parent is None:
|
|
2807
|
+
raise NotImplementedError("TODO: Add support for default tests at level 0")
|
|
2808
|
+
test = DefaultTest(offset=offset, message=message, parent=parent)
|
|
2809
|
+
elif data_type == "clear":
|
|
2810
|
+
if parent is None:
|
|
2811
|
+
raise NotImplementedError("TODO: Add support for clear tests at level 0")
|
|
2812
|
+
test = ClearTest(offset=offset, message=message, parent=parent)
|
|
2813
|
+
elif data_type == "offset":
|
|
2814
|
+
expected_value = IntegerValue.parse(test_str, num_bytes=8)
|
|
2815
|
+
test = OffsetMatchTest(offset=offset, value=expected_value, message=message,
|
|
2816
|
+
parent=parent)
|
|
2817
|
+
elif data_type == "json":
|
|
2818
|
+
test = JSONTest(offset=offset, message=message, parent=parent)
|
|
2819
|
+
elif data_type == "csv":
|
|
2820
|
+
test = CSVTest(offset=offset, message=message, parent=parent)
|
|
2821
|
+
elif data_type == "indirect" or data_type == "indirect/r":
|
|
2822
|
+
test = IndirectTest(matcher=matcher, offset=offset,
|
|
2823
|
+
relative=m.group("data_type").endswith("r"),
|
|
2824
|
+
message=message, parent=parent)
|
|
2825
|
+
elif data_type == "use":
|
|
2826
|
+
if test_str.startswith("^"):
|
|
2827
|
+
flip_endianness = True
|
|
2828
|
+
test_str = test_str[1:]
|
|
2829
|
+
elif test_str.startswith("\\^"):
|
|
2830
|
+
flip_endianness = True
|
|
2831
|
+
test_str = test_str[2:]
|
|
2832
|
+
else:
|
|
2833
|
+
flip_endianness = False
|
|
2834
|
+
if test_str not in matcher.named_tests:
|
|
2835
|
+
late_binding = True
|
|
2836
|
+
|
|
2837
|
+
class LateBindingNamedTest(NamedTest):
|
|
2838
|
+
def __init__(self):
|
|
2839
|
+
super().__init__(test_str, offset=AbsoluteOffset(0))
|
|
2840
|
+
|
|
2841
|
+
named_test: NamedTest = LateBindingNamedTest()
|
|
2842
|
+
else:
|
|
2843
|
+
late_binding = False
|
|
2844
|
+
named_test = matcher.named_tests[test_str]
|
|
2845
|
+
# named_test might be a string here (the test name) rather than an actual NamedTest object.
|
|
2846
|
+
# This will happen if the named test is defined after the use (late binding).
|
|
2847
|
+
# We will resolve this after the entire file is parsed.
|
|
2848
|
+
test = UseTest( # type: ignore
|
|
2849
|
+
named_test,
|
|
2850
|
+
offset=offset,
|
|
2851
|
+
message=message,
|
|
2852
|
+
parent=parent,
|
|
2853
|
+
flip_endianness=flip_endianness,
|
|
2854
|
+
late_binding=late_binding
|
|
2855
|
+
)
|
|
2856
|
+
elif data_type == "der":
|
|
2857
|
+
# TODO: Update this as necessary once we fully implement the DERTest
|
|
2858
|
+
test = DERTest(offset=offset, message=message, parent=parent)
|
|
2859
|
+
else:
|
|
2860
|
+
try:
|
|
2861
|
+
data_type = DataType.parse(data_type)
|
|
2862
|
+
# in some definitions a space is put after the "&" in a numeric datatype:
|
|
2863
|
+
if test_str in ("<", ">", "=", "!", "&", "^", "~"):
|
|
2864
|
+
# Some files will erroneously add whitespace between the operator and the
|
|
2865
|
+
# subsequent value:
|
|
2866
|
+
actual_operand, message = _split_with_escapes(message)
|
|
2867
|
+
test_str = f"{test_str}{actual_operand}"
|
|
2868
|
+
constant = data_type.parse_expected(test_str)
|
|
2869
|
+
except ValueError as e:
|
|
2870
|
+
raise ValueError(f"{def_file!s} line {line_number}: {e!s}")
|
|
2871
|
+
test = ConstantMatchTest(
|
|
2872
|
+
offset=offset,
|
|
2873
|
+
data_type=data_type,
|
|
2874
|
+
constant=constant,
|
|
2875
|
+
message=message,
|
|
2876
|
+
parent=parent
|
|
2877
|
+
)
|
|
2878
|
+
test.source_info = SourceInfo(def_file, line_number, line)
|
|
2879
|
+
return test
|
|
2880
|
+
|
|
2881
|
+
@staticmethod
|
|
2882
|
+
def _parse_file(
|
|
2883
|
+
def_file: Union[str, Path], matcher: "MagicMatcher"
|
|
2884
|
+
) -> Tuple[Iterable[MagicTest], Iterable[UseTest], Set[MagicTest], Set[IndirectTest]]:
|
|
2885
|
+
current_test: Optional[MagicTest] = None
|
|
2886
|
+
late_bindings: List[UseTest] = []
|
|
2887
|
+
level_zero_tests: List[MagicTest] = []
|
|
2888
|
+
tests_with_mime: Set[MagicTest] = set()
|
|
2889
|
+
indirect_tests: Set[IndirectTest] = set()
|
|
2890
|
+
comments: List[Comment] = []
|
|
2891
|
+
with open(def_file, "rb") as f:
|
|
2892
|
+
for line_number, raw_line in enumerate(f.readlines()):
|
|
2893
|
+
line_number += 1
|
|
2894
|
+
raw_line = raw_line.lstrip()
|
|
2895
|
+
if not raw_line:
|
|
2896
|
+
# skip empty lines
|
|
2897
|
+
comments = []
|
|
2898
|
+
continue
|
|
2899
|
+
elif raw_line.startswith(b"#"):
|
|
2900
|
+
# this is a comment
|
|
2901
|
+
try:
|
|
2902
|
+
comments.append(Comment(
|
|
2903
|
+
message=raw_line[1:].strip().decode("utf-8"),
|
|
2904
|
+
source_info=SourceInfo(def_file, line_number, raw_line.decode("utf-8"))
|
|
2905
|
+
))
|
|
2906
|
+
except UnicodeDecodeError:
|
|
2907
|
+
pass
|
|
2908
|
+
continue
|
|
2909
|
+
elif raw_line.startswith(b"!:apple") or raw_line.startswith(b"!:strength"):
|
|
2910
|
+
# ignore these directives for now
|
|
2911
|
+
continue
|
|
2912
|
+
try:
|
|
2913
|
+
line = raw_line.decode("utf-8")
|
|
2914
|
+
except UnicodeDecodeError:
|
|
2915
|
+
continue
|
|
2916
|
+
test = MagicMatcher.parse_test(line, def_file, line_number, current_test, matcher)
|
|
2917
|
+
if test is not None:
|
|
2918
|
+
if isinstance(test, NamedTest):
|
|
2919
|
+
matcher.named_tests[test.name] = test
|
|
2920
|
+
else:
|
|
2921
|
+
if isinstance(test, IndirectTest):
|
|
2922
|
+
indirect_tests.add(test)
|
|
2923
|
+
elif isinstance(test, UseTest) and test.late_binding:
|
|
2924
|
+
late_bindings.append(test)
|
|
2925
|
+
if test.level == 0:
|
|
2926
|
+
level_zero_tests.append(test)
|
|
2927
|
+
test.source_info = SourceInfo(def_file, line_number, line)
|
|
2928
|
+
test.comments = tuple(comments)
|
|
2929
|
+
comments = []
|
|
2930
|
+
current_test = test
|
|
2931
|
+
continue
|
|
2932
|
+
m = MIME_PATTERN.match(line)
|
|
2933
|
+
if m:
|
|
2934
|
+
if current_test is None:
|
|
2935
|
+
raise ValueError(f"{def_file!s} line {line_number}: Unexpected mime type {line!r}")
|
|
2936
|
+
elif current_test.mime is not None:
|
|
2937
|
+
raise ValueError(f"{def_file!s} line {line_number}: Duplicate mime types for test "
|
|
2938
|
+
f"{current_test!r}: {current_test.mime!r} and {m.group(1)}")
|
|
2939
|
+
current_test.mime = m.group(1)
|
|
2940
|
+
tests_with_mime.add(current_test)
|
|
2941
|
+
continue
|
|
2942
|
+
m = EXTENSION_PATTERN.match(line)
|
|
2943
|
+
if m:
|
|
2944
|
+
if current_test is None:
|
|
2945
|
+
raise ValueError(f"{def_file!s} line {line_number}: Unexpected ext: {line!r}")
|
|
2946
|
+
current_test.extensions |= {ext for ext in re.split(r"[/,]", m.group(1)) if ext}
|
|
2947
|
+
continue
|
|
2948
|
+
raise ValueError(f"{def_file!s} line {line_number}: Unexpected line\n{raw_line!r}")
|
|
2949
|
+
return level_zero_tests, late_bindings, tests_with_mime, indirect_tests
|
|
2950
|
+
|
|
2951
|
+
@staticmethod
|
|
2952
|
+
def parse(*def_files: Union[str, Path]) -> "MagicMatcher":
|
|
2953
|
+
late_bindings: Dict[str, List[UseTest]] = {}
|
|
2954
|
+
zero_level_tests: List[MagicTest] = []
|
|
2955
|
+
tests_with_mime: Set[MagicTest] = set()
|
|
2956
|
+
indirect_tests: Set[IndirectTest] = set()
|
|
2957
|
+
matcher = MagicMatcher([])
|
|
2958
|
+
for file in def_files:
|
|
2959
|
+
zl, lb, wm, it = MagicMatcher._parse_file(file, matcher=matcher)
|
|
2960
|
+
late_bindings[file] = list(lb)
|
|
2961
|
+
zero_level_tests.extend(zl)
|
|
2962
|
+
tests_with_mime |= wm
|
|
2963
|
+
indirect_tests |= it
|
|
2964
|
+
# resolve any "use" tests with late binding:
|
|
2965
|
+
for def_file, use_tests in late_bindings.items():
|
|
2966
|
+
for use_test in use_tests:
|
|
2967
|
+
if use_test.referenced_test.name not in matcher.named_tests:
|
|
2968
|
+
raise ValueError(f"{def_file!s}: Named test {use_test.referenced_test.name!r} is not defined")
|
|
2969
|
+
named_test = matcher.named_tests[use_test.referenced_test.name]
|
|
2970
|
+
use_test.referenced_test = named_test
|
|
2971
|
+
named_test.used_by.add(use_test)
|
|
2972
|
+
for test in tests_with_mime:
|
|
2973
|
+
assert test.can_match_mime
|
|
2974
|
+
for ancestor in test.ancestors():
|
|
2975
|
+
ancestor.can_match_mime = True
|
|
2976
|
+
for test in indirect_tests:
|
|
2977
|
+
assert test.can_be_indirect
|
|
2978
|
+
assert test.can_match_mime
|
|
2979
|
+
for ancestor in test.ancestors():
|
|
2980
|
+
ancestor.can_be_indirect = True
|
|
2981
|
+
for test in zero_level_tests:
|
|
2982
|
+
matcher.add(test)
|
|
2983
|
+
return matcher
|