parcae 0.1.1__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parcae might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: parcae
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: Infer daily rhythm and sleep schedule from message timestamps
5
5
  Project-URL: Homepage, https://github.com/jeremyctrl/parcae
6
6
  Project-URL: Repository, https://github.com/jeremyctrl/parcae
@@ -102,8 +102,17 @@ parcae user_timestamps.csv
102
102
  ~ inferred timezone: UTC+3
103
103
 
104
104
  + typical schedule:
105
- - sleep: 02:46 -> 11:38 (≈ 8h 45m)
106
- - awake: 11:38 -> 02:46
105
+ - sleep: 23:52 -> 06:34 (≈ 8h 30m)
106
+ - awake: 06:34 -> 23:52
107
+ - variability: ±175m
108
+
109
+ + activity profile (24h):
110
+ ▁▁▁▁▁▁▁▁▅▇▅█▆▁▅▄▅▆▁▇▇▆▆▇
111
+ | | | |
112
+ 00 06 12 18
113
+
114
+ + fingerprint:
115
+ parcae:v1:AAAAAAAAAAAAAAAAAAAAAD0AWQA6AGMAQQAAADoAMAA6AEcAAABWAFUATgBMAFsAd__-D9QPqP12BPEBqwU=
107
116
 
108
117
  ~ based on 30 days of data
109
118
  ~ bin size: 15 minutes
@@ -85,8 +85,17 @@ parcae user_timestamps.csv
85
85
  ~ inferred timezone: UTC+3
86
86
 
87
87
  + typical schedule:
88
- - sleep: 02:46 -> 11:38 (≈ 8h 45m)
89
- - awake: 11:38 -> 02:46
88
+ - sleep: 23:52 -> 06:34 (≈ 8h 30m)
89
+ - awake: 06:34 -> 23:52
90
+ - variability: ±175m
91
+
92
+ + activity profile (24h):
93
+ ▁▁▁▁▁▁▁▁▅▇▅█▆▁▅▄▅▆▁▇▇▆▆▇
94
+ | | | |
95
+ 00 06 12 18
96
+
97
+ + fingerprint:
98
+ parcae:v1:AAAAAAAAAAAAAAAAAAAAAD0AWQA6AGMAQQAAADoAMAA6AEcAAABWAFUATgBMAFsAd__-D9QPqP12BPEBqwU=
90
99
 
91
100
  ~ based on 30 days of data
92
101
  ~ bin size: 15 minutes
@@ -1,4 +1,4 @@
1
- __version__ = "0.1.1"
1
+ __version__ = "0.2.0"
2
2
 
3
3
  from .api import Parcae
4
4
 
@@ -51,6 +51,22 @@ def _viterbi(obs, log_trans, log_emit, log_init):
51
51
  return path, best
52
52
 
53
53
 
54
+ def _parse_timestamps(timestamps):
55
+ out = []
56
+ for t in timestamps:
57
+ if isinstance(t, datetime):
58
+ out.append(t)
59
+ else:
60
+ out.append(datetime.fromisoformat(str(t)))
61
+ return sorted(out)
62
+
63
+
64
+ def _downsample(x, k):
65
+ n = len(x)
66
+ idx = np.linspace(0, n, k + 1, dtype=int)
67
+ return np.array([x[idx[i] : idx[i + 1]].mean() for i in range(k)], dtype=np.float32)
68
+
69
+
54
70
  class Parcae:
55
71
  def __init__(self, model_path=None, bin_minutes=15):
56
72
  if model_path is None:
@@ -72,15 +88,6 @@ class Parcae:
72
88
  self.sleep_state = int(np.argmin(self.emissionprob[:, 1]))
73
89
  self.awake_state = 1 - self.sleep_state
74
90
 
75
- def _parse_timestamps(self, timestamps):
76
- out = []
77
- for t in timestamps:
78
- if isinstance(t, datetime):
79
- out.append(t)
80
- else:
81
- out.append(datetime.fromisoformat(str(t)))
82
- return sorted(out)
83
-
84
91
  def _bin(self, timestamps):
85
92
  start = timestamps[0].replace(hour=0, minute=0, second=0, microsecond=0)
86
93
  end = timestamps[-1].replace(
@@ -100,7 +107,7 @@ class Parcae:
100
107
  return start, bins
101
108
 
102
109
  def analyze(self, timestamps, tz_range=range(-12, 13)):
103
- ts = self._parse_timestamps(timestamps)
110
+ ts = _parse_timestamps(timestamps)
104
111
 
105
112
  span = ts[-1] - ts[0]
106
113
  if span < timedelta(days=2): # arbitrary number that seems fine
@@ -134,6 +141,13 @@ class Parcae:
134
141
  shift_bins = int(best_phi * bins_per_day / 24)
135
142
  best_bins = np.roll(bins, shift_bins)
136
143
 
144
+ days = len(best_bins) // bins_per_day
145
+ day_matrix = best_bins[: days * bins_per_day].reshape(days, bins_per_day)
146
+
147
+ profile = day_matrix.mean(axis=0)
148
+ profile = profile / (profile.sum() + 1e-8)
149
+ profile_24h = _downsample(profile, 24)
150
+
137
151
  states, _ = _viterbi(
138
152
  best_bins, self.log_transmat, self.log_emissionprob, self.log_startprob
139
153
  )
@@ -158,6 +172,29 @@ class Parcae:
158
172
  else:
159
173
  awake_blocks.append((block_start, len(states)))
160
174
 
175
+ sleep_durations = [(b - a) * self.bin_minutes for a, b in sleep_blocks]
176
+
177
+ if sleep_durations:
178
+ dur = np.array(sleep_durations, dtype=np.float32)
179
+ sleep_stats = np.array([dur.mean(), dur.std(), np.median(dur)]) / 1440.0
180
+ else:
181
+ sleep_stats = np.zeros(3, dtype=np.float32)
182
+
183
+ if sleep_blocks:
184
+ starts = np.array([a for a, _ in sleep_blocks]) * self.bin_minutes
185
+ ends = np.array([b for _, b in sleep_blocks]) * self.bin_minutes
186
+
187
+ start_m = starts.mean()
188
+ end_m = ends.mean()
189
+
190
+ def circ(m):
191
+ ang = 2 * np.pi * m / 1440.0
192
+ return np.sin(ang), np.cos(ang)
193
+
194
+ sleep_phase = np.array([*circ(start_m), *circ(end_m)], dtype=np.float32)
195
+ else:
196
+ sleep_phase = np.zeros(4, dtype=np.float32)
197
+
161
198
  def blocks_to_time(blocks):
162
199
  out = []
163
200
  for a, b in blocks:
@@ -170,4 +207,8 @@ class Parcae:
170
207
  "timezone_offset_hours": int(best_phi),
171
208
  "sleep_blocks": blocks_to_time(sleep_blocks),
172
209
  "awake_blocks": blocks_to_time(awake_blocks),
210
+ "profile_24h": profile_24h,
211
+ "sleep_phase": sleep_phase,
212
+ "sleep_stats": sleep_stats.astype(np.float32),
213
+ "days": int(days),
173
214
  }
@@ -0,0 +1,170 @@
1
+ import argparse
2
+ import base64
3
+ import csv
4
+ import math
5
+
6
+ import numpy as np
7
+
8
+ from parcae import Parcae
9
+
10
+
11
+ def parse_csv(path):
12
+ timestamps = []
13
+ with open(path, "r", encoding="utf-8") as f:
14
+ reader = csv.DictReader(f)
15
+ fieldnames = reader.fieldnames
16
+ if fieldnames is None or "timestamp" not in fieldnames:
17
+ raise ValueError("! CSV must have a 'timestamp' column")
18
+
19
+ for row in reader:
20
+ timestamps.append(row["timestamp"])
21
+
22
+ return timestamps
23
+
24
+
25
+ def minutes_since_midnight(dt):
26
+ return dt.hour * 60 + dt.minute
27
+
28
+
29
+ def format_hm(minutes):
30
+ h = (minutes // 60) % 24
31
+ m = minutes % 60
32
+ return f"{h:02d}:{m:02d}"
33
+
34
+
35
+ def angle_to_minutes(sin_v, cos_v):
36
+ ang = math.atan2(sin_v, cos_v)
37
+ if ang < 0:
38
+ ang += 2 * math.pi
39
+ return int(round(ang * 1440 / (2 * math.pi)))
40
+
41
+
42
+ def decode_fp(s):
43
+ s = s.split(":", 2)[2]
44
+ raw = base64.urlsafe_b64decode(s)
45
+ q = np.frombuffer(raw, dtype=np.int16)
46
+ return q.astype(np.float32) / 4096.0
47
+
48
+
49
+ def cosine(a, b):
50
+ return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)))
51
+
52
+
53
+ def sparkline(x):
54
+ ticks = "▁▂▃▄▅▆▇█"
55
+ x = np.asarray(x, dtype=float)
56
+
57
+ lo = x.min()
58
+ hi = x.max()
59
+
60
+ if hi == lo:
61
+ return ticks[0] * len(x)
62
+
63
+ scaled = (x - lo) / (hi - lo) * (len(ticks) - 1)
64
+ idx = np.round(scaled).astype(int)
65
+
66
+ return "".join(ticks[i] for i in idx)
67
+
68
+
69
+ def hour_axis(n=24, marks=(0, 6, 12, 18, 24)):
70
+ row = [" "] * n
71
+ for m in marks:
72
+ if m < n:
73
+ row[m] = "|"
74
+ return "".join(row)
75
+
76
+
77
+ def hour_labels(n=24, marks=(0, 6, 12, 18, 24)):
78
+ row = [" "] * n
79
+ for m in marks:
80
+ s = f"{m:02d}"
81
+ if m < n:
82
+ for i, c in enumerate(s):
83
+ if m + i < n:
84
+ row[m + i] = c
85
+ return "".join(row)
86
+
87
+
88
+ def main():
89
+ parser = argparse.ArgumentParser(prog="parcae")
90
+ sub = parser.add_subparsers(dest="cmd")
91
+
92
+ p_analyze = sub.add_parser("analyze")
93
+ p_analyze.add_argument("csv", help="CSV file with a 'timestamp' column")
94
+
95
+ p_cmp = sub.add_parser("compare")
96
+ p_cmp.add_argument("fp1")
97
+ p_cmp.add_argument("fp2")
98
+
99
+ parser.add_argument("-v", "--version", action="version", version="%(prog)s 0.2.0")
100
+
101
+ args = parser.parse_args()
102
+
103
+ print("+ Parcae analysis\n")
104
+
105
+ if args.cmd == "compare":
106
+ v1 = decode_fp(args.fp1)
107
+ v2 = decode_fp(args.fp2)
108
+ sim = cosine(v1, v2)
109
+
110
+ print("+ fingerprint comparison:")
111
+ print(f"\tcosine similarity: {sim:.4f}")
112
+
113
+ if sim > 0.95:
114
+ print("\tmatch: very likely same user")
115
+ elif sim > 0.90:
116
+ print("\tmatch: probable")
117
+ else:
118
+ print("\tmatch: unlikely")
119
+
120
+ return
121
+
122
+ timestamps = parse_csv(args.csv)
123
+
124
+ p = Parcae()
125
+ result = p.analyze(timestamps)
126
+
127
+ tz = result["timezone_offset_hours"]
128
+ days = result["days"]
129
+
130
+ print(f"~ inferred timezone: UTC{tz:+d}\n")
131
+
132
+ sleep_phase = result["sleep_phase"]
133
+ sleep_stats = result["sleep_stats"]
134
+
135
+ profile_24h = result["profile_24h"]
136
+
137
+ mean_start = angle_to_minutes(sleep_phase[0], sleep_phase[1])
138
+ mean_end = angle_to_minutes(sleep_phase[2], sleep_phase[3])
139
+
140
+ std_dur = int(round(sleep_stats[1] * 1440))
141
+ med_dur = int(round(sleep_stats[2] * 1440))
142
+
143
+ vec = np.concatenate(
144
+ [profile_24h, result["sleep_phase"], result["sleep_stats"]]
145
+ ).astype(np.float32)
146
+
147
+ q = np.round(vec * 4096).astype(np.int16)
148
+ fp = base64.urlsafe_b64encode(q.tobytes()).decode()
149
+
150
+ print("+ typical schedule:")
151
+ print(
152
+ f"\t- sleep: {format_hm(mean_start)} -> {format_hm(mean_end)} (≈ {med_dur // 60}h {med_dur % 60:02d}m)"
153
+ )
154
+ print(f"\t- awake: {format_hm(mean_end)} -> {format_hm(mean_start)}")
155
+ print(f"\t- variability: ±{std_dur}m\n")
156
+
157
+ print("+ activity profile (24h):")
158
+ print(f"\t{sparkline(profile_24h)}")
159
+ print(f"\t{hour_axis(len(profile_24h))}")
160
+ print(f"\t{hour_labels(len(profile_24h))}\n")
161
+
162
+ print("+ fingerprint:")
163
+ print(f"\tparcae:v1:{fp}\n")
164
+
165
+ print(f"~ based on {days} days of data")
166
+ print(f"~ bin size: {p.bin_minutes} minutes")
167
+
168
+
169
+ if __name__ == "__main__":
170
+ main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: parcae
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: Infer daily rhythm and sleep schedule from message timestamps
5
5
  Project-URL: Homepage, https://github.com/jeremyctrl/parcae
6
6
  Project-URL: Repository, https://github.com/jeremyctrl/parcae
@@ -102,8 +102,17 @@ parcae user_timestamps.csv
102
102
  ~ inferred timezone: UTC+3
103
103
 
104
104
  + typical schedule:
105
- - sleep: 02:46 -> 11:38 (≈ 8h 45m)
106
- - awake: 11:38 -> 02:46
105
+ - sleep: 23:52 -> 06:34 (≈ 8h 30m)
106
+ - awake: 06:34 -> 23:52
107
+ - variability: ±175m
108
+
109
+ + activity profile (24h):
110
+ ▁▁▁▁▁▁▁▁▅▇▅█▆▁▅▄▅▆▁▇▇▆▆▇
111
+ | | | |
112
+ 00 06 12 18
113
+
114
+ + fingerprint:
115
+ parcae:v1:AAAAAAAAAAAAAAAAAAAAAD0AWQA6AGMAQQAAADoAMAA6AEcAAABWAFUATgBMAFsAd__-D9QPqP12BPEBqwU=
107
116
 
108
117
  ~ based on 30 days of data
109
118
  ~ bin size: 15 minutes
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "parcae"
3
- version = "0.1.1"
3
+ version = "0.2.0"
4
4
  description = "Infer daily rhythm and sleep schedule from message timestamps"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.13"
@@ -1,114 +0,0 @@
1
- import argparse
2
- import csv
3
- import math
4
- from collections import defaultdict
5
- from datetime import datetime, timedelta
6
-
7
- from parcae import Parcae
8
-
9
-
10
- def parse_csv(path):
11
- timestamps = []
12
- with open(path, "r", encoding="utf-8") as f:
13
- reader = csv.DictReader(f)
14
- fieldnames = reader.fieldnames
15
- if fieldnames is None or "timestamp" not in fieldnames:
16
- raise ValueError("! CSV must have a 'timestamp' column")
17
-
18
- for row in reader:
19
- timestamps.append(row["timestamp"])
20
-
21
- return timestamps
22
-
23
-
24
- def minutes_since_midnight(dt):
25
- return dt.hour * 60 + dt.minute
26
-
27
-
28
- def format_hm(minutes):
29
- h = (minutes // 60) % 24
30
- m = minutes % 60
31
- return f"{h:02d}:{m:02d}"
32
-
33
-
34
- def circular_mean_minutes(values):
35
- angles = [2 * math.pi * v / 1440.0 for v in values]
36
-
37
- x = sum(math.cos(a) for a in angles)
38
- y = sum(math.sin(a) for a in angles)
39
-
40
- if x == 0 and y == 0:
41
- return int(values[0])
42
-
43
- mean_angle = math.atan2(y, x)
44
- if mean_angle < 0:
45
- mean_angle += 2 * math.pi
46
-
47
- mean_minutes = int(round(mean_angle * 1440.0 / (2 * math.pi)))
48
- return mean_minutes % 1440
49
-
50
-
51
- def main():
52
- parser = argparse.ArgumentParser(prog="parcae")
53
- parser.add_argument("csv", help="CSV file with a 'timestamp' column")
54
- parser.add_argument("-v", "--version", action="version", version="%(prog)s 0.1.1")
55
- args = parser.parse_args()
56
-
57
- print("+ Parcae analysis\n")
58
-
59
- timestamps = parse_csv(args.csv)
60
-
61
- p = Parcae()
62
- result = p.analyze(timestamps)
63
-
64
- tz = result["timezone_offset_hours"]
65
- sleep_blocks = result["sleep_blocks"]
66
-
67
- print(f"~ inferred timezone: UTC{tz:+d}\n")
68
-
69
- offset = timedelta(hours=tz)
70
-
71
- local_blocks = []
72
- for b in sleep_blocks:
73
- start = datetime.fromisoformat(b["start"]) + offset
74
- end = datetime.fromisoformat(b["end"]) + offset
75
- local_blocks.append((start, end))
76
-
77
- by_day = defaultdict(list)
78
-
79
- for start, end in local_blocks:
80
- day = start.date()
81
- dur = (end - start).total_seconds()
82
- by_day[day].append((dur, start, end))
83
-
84
- main_sleeps = []
85
- for day, blocks in by_day.items():
86
- blocks.sort(reverse=True)
87
- _, start, end = blocks[0]
88
- main_sleeps.append((start, end))
89
-
90
- if not main_sleeps:
91
- print("! no sleep blocks detected")
92
- return
93
-
94
- sleep_starts = [minutes_since_midnight(s) for s, e in main_sleeps]
95
- sleep_ends = [minutes_since_midnight(e) for s, e in main_sleeps]
96
- durations = [int((e - s).total_seconds() / 60) for s, e in main_sleeps]
97
-
98
- mean_start = circular_mean_minutes(sleep_starts)
99
- mean_end = circular_mean_minutes(sleep_ends)
100
- durations.sort()
101
- med_dur = durations[len(durations) // 2]
102
-
103
- print("+ typical schedule:")
104
- print(
105
- f"\t- sleep: {format_hm(mean_start)} -> {format_hm(mean_end)} (≈ {med_dur // 60}h {med_dur % 60:02d}m)"
106
- )
107
- print(f"\t- awake: {format_hm(mean_end)} -> {format_hm(mean_start)}\n")
108
-
109
- print(f"~ based on {len(main_sleeps)} days of data")
110
- print(f"~ bin size: {p.bin_minutes} minutes")
111
-
112
-
113
- if __name__ == "__main__":
114
- main()
File without changes
File without changes
File without changes
File without changes