PyPI - filejack - Versions diffs - 1.0.0__py3-none-any.whl - Mend

filejack 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

filejack/AsyncDecodeManager.py +231 -0
filejack/RSC.py +50 -0
filejack/__init__.py +6 -0
filejack/__main__.py +4 -0
filejack/cli.py +96 -0
filejack/conversions.py +23 -0
filejack/decode_data.py +70 -0
filejack/decode_frames.py +244 -0
filejack/encode_frames.py +81 -0
filejack/examples/__init__.py +0 -0
filejack/examples/decoder.py +31 -0
filejack/examples/encoder.py +18 -0
filejack/examples/merge.py +24 -0
filejack/examples/stereo_to_mono.py +22 -0
filejack/merge_frames.py +59 -0
filejack/reconstruct_data.py +12 -0
filejack/values.py +32 -0
filejack-1.0.0.dist-info/METADATA +150 -0
filejack-1.0.0.dist-info/RECORD +23 -0
filejack-1.0.0.dist-info/WHEEL +5 -0
filejack-1.0.0.dist-info/entry_points.txt +2 -0
filejack-1.0.0.dist-info/licenses/LICENSE +21 -0
filejack-1.0.0.dist-info/top_level.txt +1 -0

filejack/AsyncDecodeManager.py ADDED Viewed

@@ -0,0 +1,231 @@
+import queue
+import threading
+import time
+import zlib
+from collections import deque
+from concurrent.futures.thread import ThreadPoolExecutor
+from dataclasses import dataclass
+from fractions import Fraction
+from statistics import median
+from reedsolo import ReedSolomonError
+from scipy.signal import hilbert, butter, sosfiltfilt, resample_poly
+from scipy.io import wavfile
+from .conversions import *
+from .decode_frames import decode_frames
+from .values import *
+from .RSC import *
+import numpy as np
+# import sounddevice as sd
+@dataclass
+class Chunk:
+	stream_id: str
+	start_abs: int          # absolute sample index for this source stream
+	samples: np.ndarray     # float32
+class SourceBuffer:
+	def __init__(self, overlap: int):
+		self.total = 0
+		self.parts = deque()
+		self.overlap = overlap
+	def append(self, x: np.ndarray):
+		x = np.asarray(x, dtype=np.float32)
+		self.parts.append(x)
+		self.total += len(x)
+	def available_abs_end(self) -> int:
+		return self.total - 2 * self.overlap
+	def pop_slice(self, size) -> np.ndarray:
+		abs_start = 0
+		abs_end = size + self.overlap * 2
+		need = abs_end - abs_start
+		if need > self.total:
+			raise IndexError("slice not available yet")
+		out = np.empty(need, dtype=np.float32)
+		w = 0
+		while self.parts and w < need:
+			if len(self.parts[0]) <= need - w - 2 * self.overlap:
+				seg = self.parts.popleft()
+				out[w:w+len(seg)] = seg
+				w += len(seg)
+			else:
+				seg = self.parts[0][:need - w]
+				out[w:w+len(seg)] = seg
+				self.parts[0] = self.parts[0][need - w - 2 * self.overlap:]
+				w += len(seg)
+		self.total -= need - 2 * self.overlap
+		return out
+class AsyncDecodeManager:
+	def __init__(self, block_sec=3.0, overlap_sec=0.1):
+		self.block_samples_len = int(round(SAMPLE_RATE * block_sec))
+		self.overlap_samples_len = int(round(SAMPLE_RATE * overlap_sec))
+		self.buffer = np.array([], dtype=np.float32)
+		self.frames_payload = {}
+		self.total_expected = None
+		self.lock = threading.Lock()
+		self.rsc_error = 0
+		self.header_error = 0
+		self.crc_error = 0
+		self.duplicate_frames = 0
+		self.no_lock = 0
+		self.rsc_error = 0
+		self.header_error = 0
+		self.crc_error = 0
+		self.buffers: dict[str, SourceBuffer] = {}
+		self.next_block_start: dict[str, int] = {}
+		self.queue = queue.Queue(maxsize=2000)
+		self.executor = ThreadPoolExecutor(max_workers=32)
+		self.limit = threading.BoundedSemaphore(32)
+		self._stop = threading.Event()
+		self._thread = threading.Thread(target=self._run, daemon=True)
+		self.max_keep = 5 * self.block_samples_len + 3 * self.overlap_samples_len
+	def start(self):
+		self._stop = threading.Event()
+		self._thread.start()
+	def stop(self):
+		self._stop.set()
+		self._thread.join()
+		self.executor.shutdown(wait=True)
+	def push_chunk(self, chunk):
+		# If you have sources with different fs: resample here to target_fs (not shown)
+		self.queue.put(chunk)
+	def _ensure_stream(self, stream_id: str):
+		if stream_id not in self.buffers:
+			self.buffers[stream_id] = SourceBuffer(overlap=self.overlap_samples_len)
+			self.next_block_start[stream_id] = 0
+	def _run(self):
+		while not self._stop.is_set():
+			try:
+				chunk = self.queue.get(timeout=0.1)
+			except queue.Empty:
+				time.sleep(0.1)
+				continue
+			try:
+				self._ensure_stream(chunk.stream_id)
+				sb = self.buffers[chunk.stream_id]
+				sb.append(chunk.samples)
+				self._schedule_ready(chunk.stream_id)
+			finally:
+				self.queue.task_done()
+	def _schedule_ready(self, stream_id: str):
+		sb = self.buffers[stream_id]
+		next = self.next_block_start[stream_id]
+		while True:
+			start = max(0, next - self.overlap_samples_len)
+			end = start + self.block_samples_len + 2 * self.overlap_samples_len
+			# start = max(0, next)
+			# end = start + self.block_samples_len
+			if sb.available_abs_end() < self.block_samples_len:
+				break  # not enough samples yet
+			rx_block = sb.pop_slice(self.block_samples_len)
+			# submit decode job
+			# self.limit.acquire()
+			fut = self.executor.submit(
+				self._decode_one,
+				rx_block, stream_id, start, end
+			)
+			fut.add_done_callback(self._merge_result)
+			# advance to next valid block (no overlap in valid regions)
+			next += self.block_samples_len
+			self.next_block_start[stream_id] = next
+	def _decode_one(self, rx_block, stream_id: str, start, end):
+		frames_block, total_expected, ok_log, no_lock, rsc_error, header_error, crc_error = decode_frames(
+			rx_block, SAMPLE_RATE,
+			search_step=SAMPLES_PER_SYMBOL, quick=True
+		)
+		return (frames_block, total_expected, ok_log, no_lock, rsc_error, header_error, crc_error, stream_id, start, end)
+	def _merge_result(self, fut):
+		# self.limit.release()
+		payloads, total, ok_log, no_lock, rsc_error, header_error, crc_error, stream_id, start, end = fut.result()
+		with self.lock:
+			if total is not None and self.total_expected is None:
+				self.total_expected = total
+			for seq, payload in payloads.items():
+				self.no_lock += no_lock
+				self.rsc_error += rsc_error
+				self.header_error += header_error
+				self.crc_error += crc_error
+				if seq not in self.frames_payload:
+					self.frames_payload[seq] = payload
+				else:
+					self.duplicate_frames += 1
+			print(f"\rBlock(Sample): {end // self.block_samples_len}/{self.buffers[stream_id].available_abs_end() // self.block_samples_len}({end}/{self.buffers[stream_id].available_abs_end()})  Frames: {len(self.frames_payload)}(+{self.duplicate_frames})/{self.total_expected}  no_lock: {self.no_lock} rsc_error: {self.rsc_error} header_error: {self.header_error} crc_error: {self.crc_error}", end='')
+def feed_wav(manager: AsyncDecodeManager, stream_id: str, path: str, chunk_size=16000):
+	fs, rx = wavfile.read(path)
+	rx = rx.astype(np.float32)
+	pos = 0
+	while pos < len(rx):
+		x = rx[pos:pos+chunk_size]
+		manager.push_chunk(Chunk(stream_id=stream_id, start_abs=pos, samples=x))
+		pos += len(x)
+	manager.push_chunk(Chunk(stream_id, start_abs=pos, samples=np.zeros((manager.block_samples_len + manager.overlap_samples_len,), dtype=np.float32)))
+if __name__ == '__main__':
+	file = "bmp"
+	manager = AsyncDecodeManager()
+	manager.start()
+	# sample_pos = 0
+	# def callback(indata, frames, time_info, status):
+	# 	if status:
+	# 		print(status, flush=True)
+	#
+	# 	global sample_pos
+	# 	manager.push_chunk(Chunk(stream_id="mic", start_abs=sample_pos, samples=indata[:, 0].copy()))
+	# 	sample_pos += indata[:, 0].shape[0]
+	#
+	# stream = sd.InputStream(samplerate=SAMPLE_RATE, channels=1, dtype='float32', blocksize=4096, callback=callback)
+	#
+	# stream.start()
+	#
+	# while True:
+	# 	time.sleep(1)
+	feed_wav(manager, "wav", f"{file}.wav")
+	print("read wav")
+	# feed_wav(manager, "wav1", f"{file}1.wav")
+	manager.queue.join()
+	manager.stop()

filejack/RSC.py ADDED Viewed

@@ -0,0 +1,50 @@
+import struct
+from reedsolo import RSCodec
+from .values import *
+N = 255
+NSYM = 32
+K = N - NSYM
+RSC = RSCodec(NSYM)
+HDR_FMT = ">IIH"
+HDR_LEN = struct.calcsize(HDR_FMT)
+CRC_LEN = 4
+MAX_PAYLOAD = K - HDR_LEN - CRC_LEN
+def lfsr_bits(n: int, seed: int, taps=(7, 6)) -> list[int]:
+	"""
+	Simple LFSR bit generator.
+	taps are 1-indexed bit positions within the register size (max(taps)).
+	"""
+	m = max(taps)
+	mask = (1 << m) - 1
+	reg = seed & mask
+	if reg == 0:
+		reg = 1  # avoid stuck-at-zero
+	out = []
+	for _ in range(n):
+		# output bit (LSB)
+		out.append(reg & 1)
+		# feedback = XOR of tap bits
+		fb = 0
+		for t in taps:
+			fb ^= (reg >> (t - 1)) & 1
+		# shift right, insert feedback at MSB
+		reg = (reg >> 1) | (fb << (m - 1))
+		reg &= mask
+	return out
+def bits_to_steps(bits01: list[int]) -> list[int]:
+	return [PSK // 2 if b else 0 for b in bits01]
+PREAMBLE_STEPS = bits_to_steps(lfsr_bits(PREAMBLE_SYMS, seed=0x5D, taps=(7, 6)))
+SYNCWORD_STEPS = bits_to_steps(lfsr_bits(SYNCWORD_SYMS, seed=0x6B, taps=(7, 6)))

filejack/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from .conversions import bytes_to_symbols, symbols_to_bytes
+from .encode_frames import build_frames, encode_frames, encode_data
+from .decode_frames import decode_frames
+from .decode_data import decode_data
+from .merge_frames import merge_frames, save_fjf, load_fjf
+from .reconstruct_data import reconstruct_data

filejack/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .cli import main
+raise SystemExit(main())

filejack/cli.py ADDED Viewed

@@ -0,0 +1,96 @@
+import argparse
+import os
+import zlib
+import numpy as np
+from scipy.io import wavfile
+from .decode_data import decode_data
+from .encode_frames import encode_data
+from .merge_frames import save_fjf
+from .reconstruct_data import reconstruct_data
+from .values import *
+def encode_command(args) -> int:
+	output = args.output or args.input + ".wav"
+	data = open(args.input, 'rb').read()
+	waveform = encode_data(data)
+	if args.mono:
+		wavfile.write(output, SAMPLE_RATE, waveform)
+	else:
+		stereo = np.column_stack([waveform, -waveform])
+		wavfile.write(output, SAMPLE_RATE, stereo)
+	print(f"Encoded {len(data)} bytes into {output}")
+	print("Input CRC32:", zlib.crc32(data))
+	return 0
+def decode_command(args) -> int:
+	output = args.output
+	if output is None:
+		base, ext = os.path.splitext(args.input)
+		output = base + ".out"
+	fs_in, rx = wavfile.read(args.input)
+	rx = rx.astype(np.float32)
+	if fs_in != SAMPLE_RATE:
+		print(f"Expected sample rate {SAMPLE_RATE}, got {fs_in}")
+		return 1
+	if rx.ndim == 2:
+		# Anti-phase stereo: subtracting the channels doubles the signal and cancels common noise
+		rx = (rx[:, 0] - rx[:, 1]) / 2
+	frames_payload, total_expected, stats = decode_data(rx, threads_num=args.threads, progress=True)
+	if total_expected is None:
+		print("No frames decoded.")
+		return 1
+	if args.fjf:
+		save_fjf(total_expected, frames_payload, args.fjf)
+		print(f"Saved {len(frames_payload)} frames into {args.fjf}")
+	data = reconstruct_data(frames_payload, total_expected)
+	open(output, 'wb').write(data)
+	print("Total frames expected:", total_expected)
+	print("Frames received:", len(frames_payload))
+	print(f"Decoded {len(data)} bytes into {output}")
+	print("Output CRC32:", zlib.crc32(data))
+	if len(frames_payload) < total_expected:
+		return 1
+	return 0
+def main() -> int:
+	parser = argparse.ArgumentParser(prog="filejack", description="File encoding/decoding to audio signals.")
+	subparsers = parser.add_subparsers(dest="command", required=True)
+	encode_parser = subparsers.add_parser("encode", help="Encode a file into a WAV audio signal.")
+	encode_parser.add_argument("input", help="Path of the file to encode.")
+	encode_parser.add_argument("output", nargs='?', default=None, help="Path of the output WAV file. Defaults to <input>.wav")
+	encode_parser.add_argument("--mono", action="store_true", help="Write a mono WAV instead of anti-phase stereo.")
+	encode_parser.set_defaults(func=encode_command)
+	decode_parser = subparsers.add_parser("decode", help="Decode a WAV audio signal back into a file.")
+	decode_parser.add_argument("input", help="Path of the WAV file to decode.")
+	decode_parser.add_argument("output", nargs='?', default=None, help="Path of the output file. Defaults to <input without extension>.out")
+	decode_parser.add_argument("--fjf", default=None, help="Also save decoded frames into a .fjf file at this path.")
+	decode_parser.add_argument("--threads", type=int, default=12, help="Number of decoding threads. Defaults to 12.")
+	decode_parser.set_defaults(func=decode_command)
+	args = parser.parse_args()
+	return args.func(args)
+if __name__ == '__main__':
+	raise SystemExit(main())

filejack/conversions.py ADDED Viewed

@@ -0,0 +1,23 @@
+import numpy as np
+from .values import *
+def bytes_to_symbols(buf: bytes) -> np.ndarray:
+	b = np.frombuffer(buf, dtype=np.uint8)
+	bits = np.unpackbits(b)
+	assert bits.size % BITS_PER_SYMBOL == 0
+	bit3 = bits.reshape(-1, BITS_PER_SYMBOL)
+	symbols = np.zeros((bit3.shape[0],), dtype=np.int32)
+	for i in range(BITS_PER_SYMBOL):
+		symbols += bit3[:, i] << (BITS_PER_SYMBOL - 1 - i)
+	return symbols.astype(np.int32)
+def symbols_to_bytes(symbols: np.ndarray) -> bytes:
+	symbols = symbols.astype(np.uint8) % DAPSK
+	bits = np.empty(symbols.size * BITS_PER_SYMBOL, dtype=np.uint8)
+	for i in range(BITS_PER_SYMBOL):
+		bits[i::BITS_PER_SYMBOL] = (symbols >> (BITS_PER_SYMBOL - 1 - i)) & 1
+	packed = np.packbits(bits)
+	return packed.tobytes()

filejack/decode_data.py ADDED Viewed

@@ -0,0 +1,70 @@
+import threading
+import numpy as np
+from .decode_frames import decode_frames
+from .values import *
+def decode_data(rx: np.ndarray, threads_num: int = 12, quick: bool = True, progress: bool = False):
+	"""
+	Decode all frames from a mono sample buffer. Splits the buffer into overlapping
+	blocks and decodes them in parallel threads.
+	Returns (frames_payload, total_expected, stats) where stats is a dict with
+	no_lock, rsc_error, header_error and crc_error counters.
+	"""
+	rx = np.asarray(rx, dtype=np.float32)
+	frames_payload = {}
+	stats = {'no_lock': 0, 'rsc_error': 0, 'header_error': 0, 'crc_error': 0}
+	total_expected = [None]
+	samples_done = [0]
+	limit = threading.BoundedSemaphore(threads_num)
+	lock = threading.Lock()
+	fs_block_size = SAMPLE_RATE * 5
+	def decode_block(block: np.ndarray):
+		try:
+			frames_block, total_expected1, ok_log1, no_lock1, rsc_error1, header_error1, crc_error1 = decode_frames(block, SAMPLE_RATE, search_step=SAMPLES_PER_SYMBOL, quick=quick)
+			with lock:
+				frames_payload.update(frames_block)
+				total_expected[0] = total_expected[0] or total_expected1
+				samples_done[0] += len(block) - int(SAMPLE_RATE * 0.2)
+				stats['no_lock'] += no_lock1
+				stats['rsc_error'] += rsc_error1
+				stats['header_error'] += header_error1
+				stats['crc_error'] += crc_error1
+				if progress:
+					print(f"\rBlock(Sample): {samples_done[0] // fs_block_size}/{len(rx) // fs_block_size}({samples_done[0]}/{len(rx)})  Frames: {len(frames_payload)}/{total_expected[0]}  no_lock: {stats['no_lock']} rsc_error: {stats['rsc_error']} header_error: {stats['header_error']} crc_error: {stats['crc_error']}", end='')
+		except Exception as e:
+			print(f"\nError in thread: {e}")
+		finally:
+			limit.release()
+	threads: list[threading.Thread] = []
+	for i in range(0, len(rx), fs_block_size):
+		start = max(0, int(i - SAMPLE_RATE * 0.1))
+		end = min(len(rx), int(i + fs_block_size + SAMPLE_RATE * 0.1))
+		block = rx[start : end]
+		thread = threading.Thread(target=decode_block, args=(block,))
+		thread.start()
+		threads.append(thread)
+		limit.acquire()
+		threads = [t for t in threads if t.is_alive()]
+	while len(threads) > 0:
+		threads[0].join()
+		threads.pop(0)
+	if progress:
+		print()
+	return frames_payload, total_expected[0], stats

filejack/decode_frames.py ADDED Viewed

@@ -0,0 +1,244 @@
+import zlib
+import numpy as np
+from reedsolo import ReedSolomonError
+from scipy.signal import hilbert
+from .RSC import *
+from .conversions import *
+from .values import *
+def decode_frames(rx, sample_rate, search_step = 1, quick = False):
+	samples_per_symbol = int(round(sample_rate / baud_rate))
+	# sos_bp = butter(6, [carrier_freq - baud_rate * 2, carrier_freq + baud_rate * 2], btype='bandpass', fs=sample_rate, output='sos')
+	# rx = sosfiltfilt(sos_bp, rx).astype(np.float32)
+	pad_len = 2048
+	rx_padded = np.pad(rx, (pad_len, pad_len), 'constant')
+	analytic_padded = hilbert(rx_padded)
+	analytic = analytic_padded[pad_len:-pad_len]
+	num_symbols = int(len(rx) // samples_per_symbol)
+	n = np.arange(len(rx), dtype=np.int64)
+	t = n / sample_rate
+	bb = analytic * np.exp(-1j * 2*np.pi * carrier_freq * t)
+	# cut = 3500.0
+	# sos_lp = butter(6, cut, btype='lowpass', fs=sample_rate, output='sos')
+	# bb = sosfiltfilt(sos_lp, bb.real).astype(np.float32) + 1j * sosfiltfilt(sos_lp, bb.imag).astype(np.float32)
+	def symbols_from(bb, start_sample, offset, symbols_num):
+		start = start_sample + offset
+		need = start + symbols_num * samples_per_symbol
+		if need > len(bb):
+			return None
+		symbol = bb[start : start + symbols_num * samples_per_symbol].reshape(symbols_num, samples_per_symbol).sum(axis=1)
+		# phase = np.unwrap(np.angle(symbol))
+		# phase = np.angle(symbol)
+		# diff = np.diff(phase)
+		# diff = np.mod(diff, 2 * np.pi)
+		# diff = np.round(diff * PSK / 2 / np.pi).astype(np.int32) % PSK
+		return symbol
+	def steps_from(bb, start_sample, offset, n_symbols):
+		symbol = symbols_from(bb, start_sample, offset, n_symbols)
+		phase = np.angle(symbol[1:] * np.conj(symbol[:-1]))
+		diff = np.rint(np.mod(phase, 2*np.pi) * PSK/(2*np.pi)).astype(np.int32) % PSK
+		return diff
+	steps_by_offset = [steps_from(bb, 0, offset, (len(bb) - offset) // samples_per_symbol) for offset in range(samples_per_symbol)]
+	pattern = np.array(PREAMBLE_STEPS[1:] + SYNCWORD_STEPS, dtype=np.int32)
+	pattern_len = len(pattern)
+	PAYLOAD_STEPS_LEN = (N * 8) // BITS_PER_SYMBOL
+	frames_payload = {}
+	total_expected = None
+	min_match = int(0.9 * pattern_len)
+	no_lock = 0
+	rsc_error = 0
+	header_error = 0
+	crc_error = 0
+	ok_log = []
+	i = 0
+	while i + (pattern_len + PAYLOAD_STEPS_LEN + 1) * samples_per_symbol <= len(bb):
+		cands = []
+		for offset in range(samples_per_symbol):
+			# if i < offset:
+			# 	continue
+			start = max((i - offset) // samples_per_symbol, 0)
+			end = start + (pattern_len + PAYLOAD_STEPS_LEN + 1)
+			if end > len(steps_by_offset[offset]):
+				continue
+			steps =  steps_by_offset[offset][start : end]
+			# steps = steps_from(bb, i, offset, pattern_len + PAYLOAD_STEPS_LEN + 1)
+			# if steps is None:
+			# 	continue
+			window = steps[:pattern_len]
+			delta = (window - pattern) % PSK
+			r = np.bincount(delta, minlength=PSK).argmax()
+			score = np.sum(delta == r)
+			if score >= min_match:
+				cands.append((score, offset, steps, start, r))
+		if not cands:
+			no_lock += 1
+			i += search_step
+			continue
+		cands.sort(key=lambda x: x[0], reverse=True)
+		decoded_ok = False
+		for best_score, best_off, best_steps, best_start_idx, rotation in cands:
+			# Extract payload steps immediately after pattern
+			payload_steps = best_steps[pattern_len:pattern_len + PAYLOAD_STEPS_LEN]
+			payload_steps = (payload_steps - rotation) % PSK
+			symbols = symbols_from(bb, best_start_idx * samples_per_symbol, best_off, pattern_len + PAYLOAD_STEPS_LEN + 1)
+			gain = np.median(abs(symbols[2 : pattern_len + 1])) + 1e-12 	# epsilon - avoid div by zero
+			a_hat = abs(symbols[pattern_len + 1: pattern_len + 1 + PAYLOAD_STEPS_LEN]) / gain
+			a_hat = np.clip(a_hat, 0.0, 1.0)
+			d = np.abs(a_hat[:, None] - ASK_LEVELS[None, :])
+			amplitudes = np.argmin(d, axis=1)
+			combined = (amplitudes.astype(np.uint8) << PSK_BITS_PER_SYMBOL) | payload_steps.astype(np.uint8)
+			cw = symbols_to_bytes(combined)
+			# RS decode
+			try:
+				decoded = RSC.decode(cw)[0]
+			except ReedSolomonError:
+				# print("\rReed-Solomon decoding failed, skipping unusable frame.")
+				# print(f"\rMalformed frame: {seq}/{total}  {payload_len}/{MAX_PAYLOAD}")
+				rsc_error += 1
+				continue
+			try:
+				hdr = decoded[:HDR_LEN]
+				seq, total, payload_len = struct.unpack(HDR_FMT, hdr)
+				payload = decoded[HDR_LEN:HDR_LEN + payload_len]
+				if payload_len > MAX_PAYLOAD or (total_expected is not None and (total != total_expected or seq >= total_expected)):
+					header_error += 1
+					continue
+			except struct.error:
+				header_error += 1
+				continue
+			try:
+				crc_recv = struct.unpack('>I', decoded[HDR_LEN + payload_len:HDR_LEN + payload_len + CRC_LEN])[0]
+			except struct.error:
+				print(f"\rFrame CRC unpacking failed: {seq}/{total_expected}")
+				print(decoded[HDR_LEN + payload_len:HDR_LEN + payload_len + CRC_LEN])
+				crc_error += 1
+				continue
+			crc_calc = zlib.crc32(hdr + payload) & 0xFFFFFFFF
+			if crc_recv != crc_calc:
+				print(f"\rCRC mismatch for frame {seq}: received {crc_recv}, calculated {crc_calc}")
+				crc_error += 1
+				continue
+			decoded_ok = True
+			break
+		if not decoded_ok:
+			i += search_step
+			continue
+		else:
+			try:
+				frames_payload[seq] = payload
+				total_expected = total_expected or total
+				frame_steps = pattern_len + PAYLOAD_STEPS_LEN + 1
+				frame_samples = frame_steps * samples_per_symbol
+				frame_start_sample = best_off + best_start_idx * samples_per_symbol
+				extra = 72
+				if len(ok_log) >= 2:
+					last_seq, last_start = ok_log[-1][0], ok_log[-1][1]
+					if last_seq < seq:  # Normal forward progression
+						# Calculate actual spacing from last decoded frame
+						delta_seq = seq - last_seq
+						delta_samples = frame_start_sample - last_start
+						actual_spacing = delta_samples / delta_seq  # samples per frame
+						# Predict next frame using observed spacing
+						expected = frame_start_sample + int(round(actual_spacing))
+					else:
+						# Fallback to nominal
+						expected = frame_start_sample + frame_samples + extra
+				else:
+					expected = frame_start_sample + frame_samples + extra
+				W = 16 * samples_per_symbol  # try 4..8 symbols worth, start with 48 samples
+				best_local_score = -1
+				best_local_start = None
+				best_local_off = None
+				best_local_idx = None
+				best_local_rot = None
+				for cand_start_sample in range(expected - W, expected + W + 1):
+					if cand_start_sample < 0:
+						continue
+					off = cand_start_sample % samples_per_symbol
+					start_idx2 = cand_start_sample // samples_per_symbol
+					end2 = start_idx2 + (pattern_len + PAYLOAD_STEPS_LEN + 1)
+					if end2 > len(steps_by_offset[off]):
+						continue
+					steps2 = steps_by_offset[off][start_idx2:end2]
+					window2 = steps2[:pattern_len]
+					delta2 = (window2 - pattern) % PSK
+					r2 = np.bincount(delta2, minlength=PSK).argmax()
+					score2 = np.sum(delta2 == r2)
+					if score2 > best_local_score:
+						best_local_score = score2
+						best_local_start = cand_start_sample
+						best_local_off = off
+						best_local_idx = start_idx2
+						best_local_rot = r2
+				# snap if we found a good preamble near expected
+				if best_local_score >= min_match:
+					i = best_local_start
+				else:
+					if quick:
+						i = expected
+					else:
+						i += search_step
+				# ok_log.append((seq, frame_start_sample, expected, best_off, best_start_idx, best_score, rotation))
+				ok_log.append((seq, frame_start_sample, best_score))
+			except ValueError as e:
+				print(f"\rFrame parsing failed: {e}")
+				i += search_step
+	return frames_payload, total_expected, ok_log, no_lock, rsc_error, header_error, crc_error

filejack/encode_frames.py ADDED Viewed

@@ -0,0 +1,81 @@
+import struct
+import zlib
+import numpy as np
+from .values import *
+from .conversions import *
+from .RSC import *
+def build_frames(data: bytes) -> list[bytearray]:
+	chunks = [data[i:i + MAX_PAYLOAD] for i in range(0, len(data), MAX_PAYLOAD)]
+	frames = []
+	for seq, payload in enumerate(chunks):
+		hdr = struct.pack(HDR_FMT, seq, len(chunks), len(payload))
+		crc = zlib.crc32(hdr + payload) & 0xFFFFFFFF
+		crc_b = struct.pack('>I', crc)
+		frame = hdr + payload + crc_b
+		frame += b"\x00" * (K - len(frame))
+		codeword = RSC.encode(frame)
+		frames.append(codeword)
+	return frames
+def encode_frames(frames: list) -> np.ndarray:
+	if not frames:
+		return np.zeros(0, dtype=np.int16)
+	cursor = 0
+	all_samples = []
+	for cw in frames:
+		payload_steps = bytes_to_symbols(cw)
+		steps = np.concatenate([PREAMBLE_STEPS, SYNCWORD_STEPS, payload_steps % PSK])
+		steps = np.array(steps, dtype=np.int32)
+		diff = np.cumsum(steps) % PSK
+		amp_idx = (payload_steps / PSK).astype(np.int32)
+		amplitudes = ASK_LEVELS[amp_idx]
+		amplitudes = np.concatenate([np.full(len(PREAMBLE_STEPS), 1.0), np.full(len(SYNCWORD_STEPS), 1.0), amplitudes], dtype=np.float32)
+		phase_signal = np.repeat(diff, SAMPLES_PER_SYMBOL)
+		amplitudes = np.repeat(amplitudes, SAMPLES_PER_SYMBOL)
+		total_samples = len(phase_signal)
+		# Optional tiny fade-in to reduce click at frame boundary
+		fade_len = 2 * SAMPLES_PER_SYMBOL
+		ramp = np.linspace(0.0, 1.0, fade_len, endpoint=False, dtype=np.float32)
+		t_in = (np.arange(fade_len, dtype=np.int64) + cursor) / float(SAMPLE_RATE)
+		fade_in = float(amplitude) * np.cos(2 * np.pi * (carrier_freq * t_in + phase_signal[0] / PSK)) * ramp
+		cursor += fade_len
+		t = np.arange(total_samples, dtype=np.int64) + cursor
+		t = t / float(SAMPLE_RATE)
+		waveform = float(amplitude) * np.cos(2 * np.pi * (carrier_freq * t + phase_signal / PSK)) * amplitudes
+		cursor += total_samples
+		t_out = (np.arange(fade_len, dtype=np.int64) + cursor) / float(SAMPLE_RATE)
+		fade_out = float(amplitude) * np.cos(2 * np.pi * (carrier_freq * t_out + phase_signal[-1] / PSK)) * ramp[::-1]
+		cursor += fade_len
+		guard = np.zeros(fade_len, dtype=np.float32)
+		cursor += fade_len
+		waveform = np.concatenate([fade_in, waveform, fade_out, guard])
+		all_samples.append(waveform)
+	waveform = np.concatenate(all_samples)
+	return np.clip(np.rint(waveform), -32768, 32767).astype(np.int16)
+def encode_data(data: bytes) -> np.ndarray:
+	return encode_frames(build_frames(data))

filejack/examples/__init__.py ADDED Viewed

File without changes

filejack/examples/decoder.py ADDED Viewed

@@ -0,0 +1,31 @@
+import zlib
+from scipy.io import wavfile
+import numpy as np
+from filejack.decode_data import decode_data
+from filejack.reconstruct_data import reconstruct_data
+from filejack.merge_frames import save_fjf
+from filejack.values import *
+file = "7z6"
+fs_in, rx = wavfile.read(f"{file}.wav")
+rx = rx.astype(np.float32)
+assert fs_in == SAMPLE_RATE, f"Expected sample rate {SAMPLE_RATE}, got {fs_in}"
+if rx.ndim == 2:
+	rx = (rx[:, 0] - rx[:, 1]) / 2
+frames_payload, total_expected, stats = decode_data(rx, threads_num=12, progress=True)
+save_fjf(total_expected, frames_payload, f"{file}.fjf")
+data = reconstruct_data(frames_payload, total_expected)
+open(f"out.{file}", 'wb').write(data)
+print("Total frames expected:", total_expected)
+print("Frames received:", len(frames_payload))
+print("Output CRC32: ", zlib.crc32(data))

filejack/examples/encoder.py ADDED Viewed

@@ -0,0 +1,18 @@
+import zlib
+import numpy as np
+from scipy.io import wavfile
+from filejack.encode_frames import encode_data
+from filejack.values import *
+file = "bmp"
+data = open(f"in.{file}", 'rb').read()
+waveform = encode_data(data)
+stereo = np.column_stack([waveform, -waveform])
+wavfile.write(f"{file}.wav", SAMPLE_RATE, stereo)
+print(zlib.crc32(data))

filejack/examples/merge.py ADDED Viewed

@@ -0,0 +1,24 @@
+from filejack.merge_frames import merge_frames, load_fjf, save_fjf
+from filejack.reconstruct_data import reconstruct_data
+def main():
+	path1 = "merged.fjf"
+	path2 = "decode3.fjf"
+	output_path = "merged.fjf"
+	total_expected, frames1 = load_fjf(path1)
+	total_expected, frames2 = load_fjf(path2)
+	merged_frames = merge_frames(total_expected, [frames1, frames2])
+	# merged_frames = merge_frames([frames2, frames1])  # Try reversing order to prefer frames from second file
+	save_fjf(total_expected, merged_frames, output_path)
+	print(f"Merged {len(frames1)} frames from {path1} and {len(frames2)} frames from {path2} into {len(merged_frames)} frames in {output_path}")
+	data = reconstruct_data(merged_frames, total_expected)
+	open("reconstructed.7z", "wb").write(data)
+if __name__ == '__main__':
+	main()

filejack/examples/stereo_to_mono.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""
+Convert a stereo WAV file to mono by subtracting the right channel from the left channel. Should be used with encoder/decoder to cancel out the noise in stereo-jack transfers.
+"""
+import numpy as np
+from scipy.io import wavfile
+def main():
+	sample_rate, rx = wavfile.read(f"stereo.wav")
+	stereo = np.array(rx).astype(np.float32)
+	mono = stereo[:, 0] - stereo[:, 1]
+	mono = mono / 2
+	mono = np.clip(np.rint(mono), -32768, 32767).astype(np.int16)
+	wavfile.write(f"mono.wav", sample_rate, mono)
+if __name__ == '__main__':
+	main()

filejack/merge_frames.py ADDED Viewed

@@ -0,0 +1,59 @@
+"""
+.fjf files is FileJack Frames file, containing multiple decoded frames with metadata. Can be used to merge results from different decoding attempts.
+"""
+import struct
+FILEJACK_FJF_HEADER = b'FJF0917\nhttps://github.com/Staheos/filejack\n1.0.0'
+def merge_frames(total_expected: int, frames_inputs: list[dict[int, bytes | bytearray | list[int]]]):
+	frames_output = {}
+	for frames_input in frames_inputs:
+		for seq, payload in frames_input.items():
+			if seq not in frames_output:
+				frames_output[seq] = payload
+	return frames_output
+def save_fjf(total_expected: int, frames: dict[int, bytes | bytearray | list[int]], filename: str):
+	with open(filename, 'wb') as f:
+		f.write(FILEJACK_FJF_HEADER)
+		f.write(struct.pack('>I', total_expected))
+		f.write(struct.pack('>I', len(frames)))
+		for seq in sorted(frames.keys()):
+			payload = frames[seq]
+			if isinstance(payload, list):
+				payload = bytes(payload)
+			payload_len = len(payload)
+			f.write(struct.pack('>I', seq))  # Frame sequence number
+			f.write(struct.pack('>I', payload_len))  # Payload length
+			f.write(payload)  # Payload data
+def load_fjf(filename: str) -> tuple[int, dict[int, bytes]]:
+	frames = {}
+	with open(filename, 'rb') as f:
+		# Read header
+		header = f.read(len(FILEJACK_FJF_HEADER))
+		if header != FILEJACK_FJF_HEADER:
+			raise ValueError("Invalid FJF file format")
+		total_expected_bytes = f.read(4)
+		total_expected = struct.unpack('>I', total_expected_bytes)[0]
+		num_frames_bytes = f.read(4)
+		num_frames = struct.unpack('>I', num_frames_bytes)[0]
+		for _ in range(num_frames):
+			seq_bytes = f.read(4)
+			payload_len_bytes = f.read(4)
+			seq = struct.unpack('>I', seq_bytes)[0]
+			payload_len = struct.unpack('>I', payload_len_bytes)[0]
+			payload = f.read(payload_len)
+			frames[seq] = payload
+	return total_expected, frames

filejack/reconstruct_data.py ADDED Viewed

@@ -0,0 +1,12 @@
+from .RSC import MAX_PAYLOAD
+def reconstruct_data(frames_payload, frames_num) -> bytes:
+	data = bytearray()
+	for i in range(0, frames_num):
+		if i not in frames_payload:
+			print(f"Missing frame {i}")
+			data.extend(b'\x00' * MAX_PAYLOAD)
+			continue
+		data.extend(frames_payload[i])
+	return bytes(data)

filejack/values.py ADDED Viewed

@@ -0,0 +1,32 @@
+import math
+import numpy as np
+PSK = 8
+PSK_BITS_PER_SYMBOL = int(math.log2(PSK))
+assert 2 ** PSK_BITS_PER_SYMBOL == PSK
+ASK_RINGS = 1
+ASK_BITS_PER_SYMBOL = int(math.log2(ASK_RINGS))
+assert 2 ** ASK_BITS_PER_SYMBOL == ASK_RINGS
+if ASK_RINGS == 1:
+	ASK_LEVELS = np.array([1])
+else:
+	ASK_LEVELS = np.linspace(0.4, 1.0, ASK_RINGS, dtype=np.float32)
+DAPSK = PSK * ASK_RINGS
+BITS_PER_SYMBOL = PSK_BITS_PER_SYMBOL + ASK_BITS_PER_SYMBOL
+SAMPLE_RATE = 48000
+baud_rate = 4000
+amplitude = int(32767 * 0.8)
+carrier_freq = 12000
+SAMPLES_PER_SYMBOL = int(round(SAMPLE_RATE / baud_rate))
+PREAMBLE_SYMS = 64
+SYNCWORD_SYMS = 32

filejack-1.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,150 @@
+Metadata-Version: 2.4
+Name: filejack
+Version: 1.0.0
+Summary: File encoding/decoding to audio signals.
+Author: Staheos
+License-Expression: MIT
+Project-URL: Homepage, https://github.com/Staheos/FileJack
+Project-URL: Source, https://github.com/Staheos/FileJack
+Project-URL: Issues, https://github.com/Staheos/FileJack/issues
+Keywords: file,audio,jack,encoder,decoder
+Classifier: Development Status :: 3 - Alpha
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy>=1.15.0
+Requires-Dist: scipy>=1.0.0
+Requires-Dist: reedsolo>=1.0.0
+Provides-Extra: test
+Requires-Dist: pytest>=8.0; extra == "test"
+Requires-Dist: pytest-cov; extra == "test"
+Requires-Dist: pytest-xdist[psutil]>=3.5; extra == "test"
+Dynamic: license-file
+# FileJack
+File encoding/decoding to audio signals.
+FileJack turns an arbitrary file into a WAV audio signal and back. The data is
+carried on a 12 kHz carrier using differential 8-PSK modulation, protected with
+Reed-Solomon error correction and a per-frame CRC. It is meant for sending files
+over an audio link (for example a 3.5 mm jack cable or an acoustic channel).
+## How it works
+The encoder splits the input into frames. Each frame carries a header
+(sequence number, total frame count, payload length), the payload and a CRC32,
+then gets Reed-Solomon encoded into a 255-byte codeword (223 data bytes + 32
+parity bytes, so up to 209 payload bytes per frame). Each codeword is mapped to
+symbols and modulated onto the carrier, preceded by a preamble and a syncword so
+the decoder can find frame boundaries.
+The decoder demodulates the recording, locks onto the preamble/syncword,
+recovers the symbols, runs Reed-Solomon correction and checks each frame's CRC.
+Valid frames are collected by sequence number and reassembled into the original
+file. Decoding runs across multiple threads over overlapping blocks, so partial
+or noisy recordings still recover whatever frames are intact.
+Signal parameters live in [filejack/values.py](filejack/values.py): 48 kHz
+sample rate, 4000 baud, 12 kHz carrier, 8-PSK.
+## Installation
+```
+pip install filejack
+```
+Or from source:
+```
+pip install .
+```
+Dependencies: numpy, scipy, reedsolo.
+## Command line usage
+Encoding writes a WAV file. By default it produces an anti-phase stereo signal
+(the right channel is the inverted left channel), which lets the decoder cancel
+common-mode noise by subtracting the channels.
+```
+filejack encode input.bin
+filejack encode input.bin output.wav
+filejack encode input.bin output.wav --mono
+```
+Decoding reads a WAV file and writes the reconstructed file. Stereo input is
+folded down automatically.
+```
+filejack decode output.wav
+filejack decode output.wav recovered.bin
+```
+Decode options:
+- `--fjf PATH` also saves the decoded frames into a `.fjf` file. This lets you
+  merge frames recovered from several recordings of the same transmission.
+- `--threads N` sets the number of decoding threads (default 12).
+The command is also available as `python -m filejack`.
+## Library usage
+```python
+import zlib
+from scipy.io import wavfile
+import numpy as np
+from filejack.encode_frames import encode_data
+from filejack.decode_data import decode_data
+from filejack.reconstruct_data import reconstruct_data
+from filejack.values import SAMPLE_RATE
+data = open("input.bin", "rb").read()
+# Encode to samples and write a WAV
+waveform = encode_data(data)
+wavfile.write("output.wav", SAMPLE_RATE, waveform)
+# Read a WAV and decode
+fs, rx = wavfile.read("output.wav")
+rx = rx.astype(np.float32)
+if rx.ndim == 2:
+	rx = (rx[:, 0] - rx[:, 1]) / 2
+frames_payload, total_expected, stats = decode_data(rx)
+recovered = reconstruct_data(frames_payload, total_expected)
+assert zlib.crc32(recovered) == zlib.crc32(data)
+```
+More runnable scripts are in [filejack/examples](filejack/examples), including
+`stereo_to_mono.py` for folding an anti-phase stereo recording down to mono.
+## .fjf files
+A `.fjf` (FileJack Frames) file stores decoded frames with their sequence
+numbers. Because each frame is independent, you can decode the same
+transmission several times, save each attempt to a `.fjf`, and merge them to
+fill in frames that were missed in individual passes. See
+[filejack/merge_frames.py](filejack/merge_frames.py) and the merge example.
+## Development
+```
+pip install .[test]
+pytest
+```
+## License
+MIT. See [LICENSE](LICENSE).

filejack-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,23 @@
+filejack/AsyncDecodeManager.py,sha256=g7-omESfySca1zNjFillfJ4AChBatExqd8UiLXwEPMk,6667
+filejack/RSC.py,sha256=qD7XCWE82uJHTl6mjcKks-NUToxZZG88zjHydc6MOx8,1023
+filejack/__init__.py,sha256=ZVeR32bA--Dy2akb_i2kGeskFSLR3n9TqKs4SK1bp3Y,312
+filejack/__main__.py,sha256=D31U8_ux95qF64EQ8ReT25nabzxh7ped5avpobXz_bM,49
+filejack/cli.py,sha256=y3EKLSLQpanJ0lQQuLlO9zXZHBLWNSbXacNpuhGbvSU,3142
+filejack/conversions.py,sha256=rp0t9ZHTUJsMG9hvkYDHMcESaGfib1v1oaOjggRfckg,736
+filejack/decode_data.py,sha256=zsBqw80kpDUhkb3dvLfvQ5TbNIKHXAYeKzMxMKMc9RM,2263
+filejack/decode_frames.py,sha256=QvTw7nkc7kHVPFn8793-kcCiA4XrM2DeC3FPn6fyaQM,7587
+filejack/encode_frames.py,sha256=SW-uM7yjtAZMbsN5QReDXzGuGAdCT5z8SjHifBZTDXo,2468
+filejack/merge_frames.py,sha256=lfb86_hYjmFO0QjtQ_qbrGl7EqmgSezGrgr_yioAzuw,1827
+filejack/reconstruct_data.py,sha256=AlD887NndfYXHui2Hp4D69X_2GzNUHIOM57a2N100jc,305
+filejack/values.py,sha256=MOe7cRg8jNACeOe-e5tIzK2kmhIz_kBkdocXR4RiIgg,622
+filejack/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+filejack/examples/decoder.py,sha256=25bYNzR53FWyjA7ls81V6FQa7kMe_c2NOgrWmvhs4Dk,827
+filejack/examples/encoder.py,sha256=k5tO3xe72lC703oUKT7-RY92EvKIj1m7hP7lZqJ89jg,347
+filejack/examples/merge.py,sha256=7lNOBo5WNgDRaSNrU1blA5dWgzDZZ0IZcfQbSZh4N_0,807
+filejack/examples/stereo_to_mono.py,sha256=PBgPaiDwSTPyMjAJ18b7XmkFyEAgmdXe05SV5kQS7B0,539
+filejack-1.0.0.dist-info/licenses/LICENSE,sha256=C-SikHBZl70gMFwjvC9EqIKe4ki3pK0QM3L4Avm1MD8,1064
+filejack-1.0.0.dist-info/METADATA,sha256=It0PBjUtDg-hxaPMXi1htnEpgmGnSbDfPfLJJ2vGOJg,4750
+filejack-1.0.0.dist-info/WHEEL,sha256=K260EYznzXsJYBQGqmI8VTxEdiZYNvDZwW9cBh9-_MA,91
+filejack-1.0.0.dist-info/entry_points.txt,sha256=sotGg7_o1BuSJOl4llcQ2ypWQ9bhFntHMHfZlyF4ocQ,47
+filejack-1.0.0.dist-info/top_level.txt,sha256=67ReEJaubGZkfcf0WlBmxj3zoLXwoppOd-7AzbgqtlM,9
+filejack-1.0.0.dist-info/RECORD,,

filejack-1.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (83.0.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

filejack-1.0.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ filejack = filejack.cli:main

filejack-1.0.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Staheos
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

filejack-1.0.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ filejack