civic-transparency-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ci/transparency/sdk/__init__.py +23 -0
- ci/transparency/sdk/_version.py +34 -0
- ci/transparency/sdk/cli/main.py +158 -0
- ci/transparency/sdk/digests.py +26 -0
- ci/transparency/sdk/hash_core.py +15 -0
- ci/transparency/sdk/ids.py +39 -0
- ci/transparency/sdk/io_schema.py +128 -0
- ci/transparency/sdk/window_agg.py +35 -0
- civic_transparency_sdk-0.1.0.dist-info/METADATA +157 -0
- civic_transparency_sdk-0.1.0.dist-info/RECORD +14 -0
- civic_transparency_sdk-0.1.0.dist-info/WHEEL +5 -0
- civic_transparency_sdk-0.1.0.dist-info/entry_points.txt +2 -0
- civic_transparency_sdk-0.1.0.dist-info/licenses/LICENSE +21 -0
- civic_transparency_sdk-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,23 @@
|
|
1
|
+
# ci/transparency/toolkit/__init__.py
|
2
|
+
|
3
|
+
from .digests import Digests, MinHashSig, SimHash64
|
4
|
+
from .hash_core import ContentHash
|
5
|
+
from .ids import HashId, TopicId, WorldId
|
6
|
+
from .io_schema import dumps, loads, windowagg_from_json, windowagg_to_json
|
7
|
+
from .window_agg import TopHash, WindowAgg
|
8
|
+
|
9
|
+
__all__ = [
|
10
|
+
"ContentHash",
|
11
|
+
"TopHash",
|
12
|
+
"WindowAgg",
|
13
|
+
"Digests",
|
14
|
+
"SimHash64",
|
15
|
+
"MinHashSig",
|
16
|
+
"windowagg_to_json",
|
17
|
+
"windowagg_from_json",
|
18
|
+
"dumps",
|
19
|
+
"loads",
|
20
|
+
"HashId",
|
21
|
+
"WorldId",
|
22
|
+
"TopicId",
|
23
|
+
]
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# file generated by setuptools-scm
|
2
|
+
# don't change, don't track in version control
|
3
|
+
|
4
|
+
__all__ = [
|
5
|
+
"__version__",
|
6
|
+
"__version_tuple__",
|
7
|
+
"version",
|
8
|
+
"version_tuple",
|
9
|
+
"__commit_id__",
|
10
|
+
"commit_id",
|
11
|
+
]
|
12
|
+
|
13
|
+
TYPE_CHECKING = False
|
14
|
+
if TYPE_CHECKING:
|
15
|
+
from typing import Tuple
|
16
|
+
from typing import Union
|
17
|
+
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
19
|
+
COMMIT_ID = Union[str, None]
|
20
|
+
else:
|
21
|
+
VERSION_TUPLE = object
|
22
|
+
COMMIT_ID = object
|
23
|
+
|
24
|
+
version: str
|
25
|
+
__version__: str
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
27
|
+
version_tuple: VERSION_TUPLE
|
28
|
+
commit_id: COMMIT_ID
|
29
|
+
__commit_id__: COMMIT_ID
|
30
|
+
|
31
|
+
__version__ = version = '0.1.0'
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 0)
|
33
|
+
|
34
|
+
__commit_id__ = commit_id = None
|
@@ -0,0 +1,158 @@
|
|
1
|
+
"""
|
2
|
+
CLI for civic transparency simulation core.
|
3
|
+
|
4
|
+
Minimal interface for basic data generation and conversion utilities.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import argparse
|
8
|
+
import sys
|
9
|
+
from pathlib import Path
|
10
|
+
from typing import List, Optional
|
11
|
+
|
12
|
+
|
13
|
+
def cmd_generate_baseline(args: argparse.Namespace) -> None:
|
14
|
+
"""Generate world - automatically choose baseline or influenced based on parameters."""
|
15
|
+
import subprocess
|
16
|
+
|
17
|
+
# Check if any influence parameters are provided
|
18
|
+
has_influence_params = any(
|
19
|
+
[
|
20
|
+
getattr(args, "dup_mult", None) is not None,
|
21
|
+
getattr(args, "burst_minutes", None) is not None,
|
22
|
+
getattr(args, "reply_nudge", None) is not None,
|
23
|
+
]
|
24
|
+
)
|
25
|
+
|
26
|
+
if has_influence_params:
|
27
|
+
# Use the influenced world generator
|
28
|
+
cmd = [
|
29
|
+
sys.executable,
|
30
|
+
"-m",
|
31
|
+
"scripts_py.gen_world_b_light",
|
32
|
+
"--topic-id",
|
33
|
+
args.topic_id,
|
34
|
+
"--windows",
|
35
|
+
str(args.windows),
|
36
|
+
"--step-minutes",
|
37
|
+
str(args.step_minutes),
|
38
|
+
"--out",
|
39
|
+
args.out,
|
40
|
+
"--seed",
|
41
|
+
str(args.seed),
|
42
|
+
]
|
43
|
+
if args.dup_mult:
|
44
|
+
cmd.extend(["--dup-mult", str(args.dup_mult)])
|
45
|
+
if args.burst_minutes:
|
46
|
+
cmd.extend(["--burst-minutes", str(args.burst_minutes)])
|
47
|
+
if args.reply_nudge:
|
48
|
+
cmd.extend(["--reply-nudge", str(args.reply_nudge)])
|
49
|
+
else:
|
50
|
+
# Use baseline generator
|
51
|
+
cmd = [
|
52
|
+
sys.executable,
|
53
|
+
"-m",
|
54
|
+
"scripts_py.gen_empty_world",
|
55
|
+
"--world",
|
56
|
+
args.world,
|
57
|
+
"--topic-id",
|
58
|
+
args.topic_id,
|
59
|
+
"--windows",
|
60
|
+
str(args.windows),
|
61
|
+
"--step-minutes",
|
62
|
+
str(args.step_minutes),
|
63
|
+
"--out",
|
64
|
+
args.out,
|
65
|
+
"--seed",
|
66
|
+
str(args.seed),
|
67
|
+
]
|
68
|
+
|
69
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
70
|
+
if result.returncode != 0:
|
71
|
+
print(f"Error: {result.stderr}", file=sys.stderr)
|
72
|
+
sys.exit(1)
|
73
|
+
print(result.stdout)
|
74
|
+
|
75
|
+
|
76
|
+
def cmd_convert(args: argparse.Namespace) -> None:
|
77
|
+
"""Convert JSONL to DuckDB - wrapper for jsonl_to_duckdb."""
|
78
|
+
import subprocess
|
79
|
+
|
80
|
+
cmd = [
|
81
|
+
sys.executable,
|
82
|
+
"-m",
|
83
|
+
"scripts_py.jsonl_to_duckdb",
|
84
|
+
"--jsonl",
|
85
|
+
args.jsonl,
|
86
|
+
"--duck",
|
87
|
+
args.duck,
|
88
|
+
"--schema",
|
89
|
+
args.schema,
|
90
|
+
]
|
91
|
+
|
92
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
93
|
+
if result.returncode != 0:
|
94
|
+
print(f"Error: {result.stderr}", file=sys.stderr)
|
95
|
+
sys.exit(1)
|
96
|
+
print(result.stdout)
|
97
|
+
|
98
|
+
|
99
|
+
def create_parser() -> argparse.ArgumentParser:
|
100
|
+
"""Create the main argument parser."""
|
101
|
+
parser = argparse.ArgumentParser(
|
102
|
+
prog="ct-sdk",
|
103
|
+
description="Civic Transparency Simulation Core - Basic data generation utilities",
|
104
|
+
)
|
105
|
+
|
106
|
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
107
|
+
|
108
|
+
# Generate world (baseline or influenced)
|
109
|
+
gen_parser = subparsers.add_parser("generate", help="Generate synthetic world")
|
110
|
+
gen_parser.add_argument("--world", required=True, help="World identifier")
|
111
|
+
gen_parser.add_argument("--topic-id", required=True, help="Topic identifier")
|
112
|
+
gen_parser.add_argument(
|
113
|
+
"--windows", type=int, default=12, help="Number of time windows"
|
114
|
+
)
|
115
|
+
gen_parser.add_argument(
|
116
|
+
"--step-minutes", type=int, default=10, help="Minutes per window"
|
117
|
+
)
|
118
|
+
gen_parser.add_argument("--out", required=True, help="Output JSONL file")
|
119
|
+
gen_parser.add_argument("--seed", type=int, default=4242, help="Random seed")
|
120
|
+
# Optional influence parameters
|
121
|
+
gen_parser.add_argument(
|
122
|
+
"--dup-mult", type=float, help="Duplicate multiplier for influence"
|
123
|
+
)
|
124
|
+
gen_parser.add_argument("--burst-minutes", type=int, help="Micro-burst duration")
|
125
|
+
gen_parser.add_argument(
|
126
|
+
"--reply-nudge", type=float, help="Reply proportion adjustment"
|
127
|
+
)
|
128
|
+
gen_parser.set_defaults(func=cmd_generate_baseline)
|
129
|
+
|
130
|
+
# Convert JSONL to DuckDB
|
131
|
+
convert_parser = subparsers.add_parser("convert", help="Convert JSONL to DuckDB")
|
132
|
+
convert_parser.add_argument("--jsonl", required=True, help="Input JSONL file")
|
133
|
+
convert_parser.add_argument("--duck", required=True, help="Output DuckDB file")
|
134
|
+
convert_parser.add_argument("--schema", required=True, help="Schema SQL file")
|
135
|
+
convert_parser.set_defaults(func=cmd_convert)
|
136
|
+
|
137
|
+
return parser
|
138
|
+
|
139
|
+
|
140
|
+
def main(args: Optional[List[str]] = None) -> int:
|
141
|
+
"""Main CLI entry point."""
|
142
|
+
parser = create_parser()
|
143
|
+
parsed_args = parser.parse_args(args)
|
144
|
+
|
145
|
+
if not parsed_args.command:
|
146
|
+
parser.print_help()
|
147
|
+
return 1
|
148
|
+
|
149
|
+
try:
|
150
|
+
parsed_args.func(parsed_args)
|
151
|
+
return 0
|
152
|
+
except Exception as e:
|
153
|
+
print(f"Error: {e}", file=sys.stderr)
|
154
|
+
return 1
|
155
|
+
|
156
|
+
|
157
|
+
if __name__ == "__main__":
|
158
|
+
sys.exit(main())
|
@@ -0,0 +1,26 @@
|
|
1
|
+
"""
|
2
|
+
sdk/digests.py
|
3
|
+
Optional similarity digests for content hashes.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from typing import Optional, Tuple
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass(frozen=True)
|
11
|
+
class SimHash64:
|
12
|
+
bits: int # store as Python int (0..2^64-1)
|
13
|
+
|
14
|
+
|
15
|
+
@dataclass(frozen=True)
|
16
|
+
class MinHashSig:
|
17
|
+
k: int
|
18
|
+
sig: Tuple[int, ...] # immutable tuple
|
19
|
+
|
20
|
+
|
21
|
+
@dataclass(frozen=True)
|
22
|
+
class Digests:
|
23
|
+
"""Optional similarity digests; any field may be None."""
|
24
|
+
|
25
|
+
simhash64: Optional[SimHash64] = None
|
26
|
+
minhash: Optional[MinHashSig] = None
|
@@ -0,0 +1,15 @@
|
|
1
|
+
"""
|
2
|
+
sdk/hash_core.py
|
3
|
+
Core data structures for content hashes.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from dataclasses import dataclass
|
7
|
+
|
8
|
+
from .ids import HashId
|
9
|
+
|
10
|
+
|
11
|
+
@dataclass(frozen=True)
|
12
|
+
class ContentHash:
|
13
|
+
"""Opaque identifier for a latent message family (no text)."""
|
14
|
+
|
15
|
+
id: HashId
|
@@ -0,0 +1,39 @@
|
|
1
|
+
"""
|
2
|
+
sdk/ids.py
|
3
|
+
Identifiers for events, content hashes, estimated topics, and worlds.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from dataclasses import dataclass
|
7
|
+
|
8
|
+
|
9
|
+
@dataclass(frozen=True)
|
10
|
+
class EventId:
|
11
|
+
value: str
|
12
|
+
|
13
|
+
|
14
|
+
@dataclass(frozen=True)
|
15
|
+
class HashId:
|
16
|
+
algo: str # e.g., "opaque", "sha256", "blake3", "simhash64"
|
17
|
+
value: str # canonicalized string for that algo
|
18
|
+
|
19
|
+
def __str__(self) -> str:
|
20
|
+
return f"{self.algo}:{self.value}"
|
21
|
+
|
22
|
+
|
23
|
+
@dataclass(frozen=True)
|
24
|
+
class TopicId:
|
25
|
+
"""
|
26
|
+
Deterministic cluster identifier derived from content identifiers/fingerprints.
|
27
|
+
Store as 'algo:value' (same canonicalization discipline as HashId).
|
28
|
+
"""
|
29
|
+
|
30
|
+
algo: str # e.g., "simhash64-lsh", "minhash-lsh", "sha256", "opaque-topic", "x-<vendor>"
|
31
|
+
value: str # canonical cluster key for that algo (hex or base64url per algo spec)
|
32
|
+
|
33
|
+
def __str__(self) -> str:
|
34
|
+
return f"{self.algo}:{self.value}"
|
35
|
+
|
36
|
+
|
37
|
+
@dataclass(frozen=True)
|
38
|
+
class WorldId:
|
39
|
+
value: str
|
@@ -0,0 +1,128 @@
|
|
1
|
+
"""
|
2
|
+
sdk/io_schema.py
|
3
|
+
JSON serialization for WindowAgg and related types.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from datetime import datetime
|
7
|
+
from typing import Any, Dict, List, Optional, TypedDict
|
8
|
+
|
9
|
+
import orjson
|
10
|
+
|
11
|
+
from .digests import Digests, MinHashSig, SimHash64
|
12
|
+
from .hash_core import ContentHash
|
13
|
+
from .ids import HashId
|
14
|
+
from .window_agg import TopHash, WindowAgg
|
15
|
+
|
16
|
+
|
17
|
+
def _dt_to_iso(dt: datetime) -> str:
|
18
|
+
return dt.replace(microsecond=0).isoformat() + "Z"
|
19
|
+
|
20
|
+
|
21
|
+
def _iso_to_dt(s: str) -> datetime:
|
22
|
+
return datetime.fromisoformat(s.rstrip("Z"))
|
23
|
+
|
24
|
+
|
25
|
+
class TopHashJson(TypedDict):
|
26
|
+
hash: str
|
27
|
+
count: int
|
28
|
+
|
29
|
+
|
30
|
+
class SimHash64Json(TypedDict):
|
31
|
+
bits: str
|
32
|
+
|
33
|
+
|
34
|
+
class MinHashJson(TypedDict):
|
35
|
+
k: int
|
36
|
+
sig: List[str]
|
37
|
+
|
38
|
+
|
39
|
+
class DigestsJson(TypedDict, total=False):
|
40
|
+
simhash64: SimHash64Json
|
41
|
+
minhash: MinHashJson
|
42
|
+
|
43
|
+
|
44
|
+
class WindowAggJson(TypedDict, total=False):
|
45
|
+
world_id: str
|
46
|
+
topic_id: str
|
47
|
+
window_start: str
|
48
|
+
window_end: str
|
49
|
+
n_messages: int
|
50
|
+
n_unique_hashes: int
|
51
|
+
dup_rate: float
|
52
|
+
top_hashes: List[TopHashJson]
|
53
|
+
hash_concentration: float
|
54
|
+
burst_score: float
|
55
|
+
type_mix: Dict[str, int]
|
56
|
+
time_histogram: List[int]
|
57
|
+
digests: DigestsJson
|
58
|
+
|
59
|
+
|
60
|
+
def windowagg_to_json(agg: WindowAgg) -> WindowAggJson:
|
61
|
+
payload: WindowAggJson = {
|
62
|
+
"world_id": agg.world_id,
|
63
|
+
"topic_id": agg.topic_id,
|
64
|
+
"window_start": _dt_to_iso(agg.window_start),
|
65
|
+
"window_end": _dt_to_iso(agg.window_end),
|
66
|
+
"n_messages": agg.n_messages,
|
67
|
+
"n_unique_hashes": agg.n_unique_hashes,
|
68
|
+
"dup_rate": agg.dup_rate,
|
69
|
+
"top_hashes": [
|
70
|
+
{"hash": str(th.hash.id), "count": th.count} for th in agg.top_hashes
|
71
|
+
],
|
72
|
+
"hash_concentration": agg.hash_concentration,
|
73
|
+
"burst_score": agg.burst_score,
|
74
|
+
"type_mix": dict(agg.type_mix),
|
75
|
+
"time_histogram": list(agg.time_histogram),
|
76
|
+
}
|
77
|
+
if agg.digests:
|
78
|
+
d: DigestsJson = {}
|
79
|
+
if agg.digests.simhash64:
|
80
|
+
d["simhash64"] = {"bits": hex(agg.digests.simhash64.bits)}
|
81
|
+
if agg.digests.minhash:
|
82
|
+
d["minhash"] = {
|
83
|
+
"k": agg.digests.minhash.k,
|
84
|
+
"sig": [hex(x) for x in agg.digests.minhash.sig],
|
85
|
+
}
|
86
|
+
payload["digests"] = d
|
87
|
+
return payload
|
88
|
+
|
89
|
+
|
90
|
+
def windowagg_from_json(d: Dict[str, Any]) -> WindowAgg:
|
91
|
+
top: List[TopHash] = []
|
92
|
+
for th in d.get("top_hashes", []):
|
93
|
+
algo, value = th["hash"].split(":", 1)
|
94
|
+
top.append(TopHash(ContentHash(HashId(algo, value)), int(th["count"])))
|
95
|
+
dig = None
|
96
|
+
if "digests" in d:
|
97
|
+
dj = d["digests"]
|
98
|
+
sh = SimHash64(int(dj["simhash64"]["bits"], 16)) if "simhash64" in dj else None
|
99
|
+
mh = None
|
100
|
+
if "minhash" in dj:
|
101
|
+
mh = MinHashSig(
|
102
|
+
k=int(dj["minhash"]["k"]),
|
103
|
+
sig=tuple(int(x, 16) for x in dj["minhash"].get("sig", [])),
|
104
|
+
)
|
105
|
+
dig = Digests(simhash64=sh, minhash=mh)
|
106
|
+
return WindowAgg(
|
107
|
+
world_id=d["world_id"],
|
108
|
+
topic_id=d["topic_id"],
|
109
|
+
window_start=_iso_to_dt(d["window_start"]),
|
110
|
+
window_end=_iso_to_dt(d["window_end"]),
|
111
|
+
n_messages=int(d["n_messages"]),
|
112
|
+
n_unique_hashes=int(d["n_unique_hashes"]),
|
113
|
+
dup_rate=float(d["dup_rate"]),
|
114
|
+
top_hashes=top,
|
115
|
+
hash_concentration=float(d["hash_concentration"]),
|
116
|
+
burst_score=float(d["burst_score"]),
|
117
|
+
type_mix=d["type_mix"],
|
118
|
+
time_histogram=d["time_histogram"],
|
119
|
+
digests=dig,
|
120
|
+
)
|
121
|
+
|
122
|
+
|
123
|
+
def dumps(obj: Dict[str, Any]) -> bytes:
|
124
|
+
return orjson.dumps(obj)
|
125
|
+
|
126
|
+
|
127
|
+
def loads(b: bytes) -> Dict[str, Any]:
|
128
|
+
return orjson.loads(b)
|
@@ -0,0 +1,35 @@
|
|
1
|
+
"""
|
2
|
+
sdk/window_agg.py
|
3
|
+
Core data structures for windowed aggregation of content hashes.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from datetime import datetime
|
8
|
+
from typing import Mapping, Optional, Sequence
|
9
|
+
|
10
|
+
from .digests import Digests
|
11
|
+
from .hash_core import ContentHash
|
12
|
+
|
13
|
+
|
14
|
+
@dataclass(frozen=True)
|
15
|
+
class TopHash:
|
16
|
+
hash: ContentHash
|
17
|
+
count: int
|
18
|
+
|
19
|
+
|
20
|
+
@dataclass(frozen=True)
|
21
|
+
class WindowAgg:
|
22
|
+
# minimal API-shaped record (students/consumers see this)
|
23
|
+
world_id: str
|
24
|
+
topic_id: str
|
25
|
+
window_start: datetime
|
26
|
+
window_end: datetime
|
27
|
+
n_messages: int
|
28
|
+
n_unique_hashes: int
|
29
|
+
dup_rate: float
|
30
|
+
top_hashes: Sequence[TopHash]
|
31
|
+
hash_concentration: float
|
32
|
+
burst_score: float
|
33
|
+
type_mix: Mapping[str, float] # {"post":.5,"reply":.3,"retweet":.2}
|
34
|
+
time_histogram: Sequence[int]
|
35
|
+
digests: Optional[Digests] = None
|
@@ -0,0 +1,157 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: civic-transparency-sdk
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: Civic Transparency SDK - Development kit for transparency applications
|
5
|
+
Author: Civic Interconnect
|
6
|
+
License-Expression: MIT
|
7
|
+
Project-URL: Documentation, https://civic-interconnect.github.io/civic-transparency-sdk/latest/
|
8
|
+
Project-URL: Homepage, https://github.com/civic-interconnect/civic-transparency-sdk
|
9
|
+
Project-URL: Repository, https://github.com/civic-interconnect/civic-transparency-sdk
|
10
|
+
Keywords: transparency,api,sdk,civic
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
12
|
+
Classifier: Intended Audience :: Developers
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
14
|
+
Classifier: Intended Audience :: Education
|
15
|
+
Classifier: Operating System :: OS Independent
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
17
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
20
|
+
Classifier: Topic :: Scientific/Engineering
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
22
|
+
Requires-Python: >=3.11
|
23
|
+
Description-Content-Type: text/markdown
|
24
|
+
License-File: LICENSE
|
25
|
+
Requires-Dist: duckdb
|
26
|
+
Requires-Dist: civic-transparency-types>=0.2.1
|
27
|
+
Requires-Dist: matplotlib
|
28
|
+
Requires-Dist: numpy
|
29
|
+
Requires-Dist: orjson
|
30
|
+
Requires-Dist: pandas
|
31
|
+
Requires-Dist: pydantic>=2.0.1
|
32
|
+
Requires-Dist: pyyaml
|
33
|
+
Provides-Extra: dev
|
34
|
+
Requires-Dist: build; extra == "dev"
|
35
|
+
Requires-Dist: jsonschema; extra == "dev"
|
36
|
+
Requires-Dist: mkdocs; extra == "dev"
|
37
|
+
Requires-Dist: mkdocs-git-revision-date-localized-plugin; extra == "dev"
|
38
|
+
Requires-Dist: mkdocs-include-markdown-plugin; extra == "dev"
|
39
|
+
Requires-Dist: mkdocs-material; extra == "dev"
|
40
|
+
Requires-Dist: mkdocs-static-i18n; extra == "dev"
|
41
|
+
Requires-Dist: mkdocstrings[python]; extra == "dev"
|
42
|
+
Requires-Dist: mike; extra == "dev"
|
43
|
+
Requires-Dist: pre-commit; extra == "dev"
|
44
|
+
Requires-Dist: pytest; extra == "dev"
|
45
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
46
|
+
Requires-Dist: ruff; extra == "dev"
|
47
|
+
Provides-Extra: docs-minify
|
48
|
+
Requires-Dist: mkdocs-minify-plugin; extra == "docs-minify"
|
49
|
+
Dynamic: license-file
|
50
|
+
|
51
|
+
# Civic Transparency Software Development Kit (SDK)
|
52
|
+
|
53
|
+
[](https://civic-interconnect.github.io/civic-transparency-sdk/)
|
54
|
+
[](https://pypi.org/project/civic-transparency-sdk/)
|
55
|
+
[](https://pypi.org/project/civic-transparency-sdk/)
|
56
|
+
[](https://github.com/civic-interconnect/civic-transparency-sdk/actions/workflows/ci.yml)
|
57
|
+
[](./LICENSE)
|
58
|
+
|
59
|
+
A software development kit for civic transparency applications.
|
60
|
+
|
61
|
+
## Installation
|
62
|
+
|
63
|
+
```pwsh
|
64
|
+
pip install civic-transparency-sdk
|
65
|
+
```
|
66
|
+
|
67
|
+
## What This Package Provides
|
68
|
+
|
69
|
+
**Core Data Types**: Standardized structures for transparency APIs including WindowAgg aggregations, ContentHash identifiers, and content fingerprinting that enable consistent data exchange.
|
70
|
+
|
71
|
+
**Implementation Support**: Essential utilities for platforms implementing transparency APIs, including JSON serialization, database schemas, and validation helpers.
|
72
|
+
|
73
|
+
**Research Tools**: Synthetic data generation capabilities for testing transparency implementations and conducting research without exposing real user data.
|
74
|
+
|
75
|
+
**Database Integration**: Ready-to-use schemas and conversion utilities for storing transparency data in SQL databases with proper indexing and query patterns.
|
76
|
+
|
77
|
+
## Quick Start
|
78
|
+
|
79
|
+
Generate synthetic data:
|
80
|
+
|
81
|
+
```pwsh
|
82
|
+
ct-sdk generate --world A --topic-id aa55ee77 --out world_A.jsonl
|
83
|
+
|
84
|
+
ct-sdk convert --jsonl world_A.jsonl --duck world_A.duckdb --schema schema/schema.sql
|
85
|
+
```
|
86
|
+
|
87
|
+
## Use Generated Data
|
88
|
+
|
89
|
+
The generated DuckDB files are ready for analysis with any SQL-compatible tools or custom analysis scripts.
|
90
|
+
|
91
|
+
## Reproducibility
|
92
|
+
|
93
|
+
All generation is deterministic and includes:
|
94
|
+
- **Seed-based randomization**: Reproduce exact datasets
|
95
|
+
- **Version tracking**: Metadata includes package versions
|
96
|
+
- **Parameter logging**: All generation settings preserved
|
97
|
+
- **Schema versioning**: Database structures documented
|
98
|
+
|
99
|
+
**Example Seeds:**
|
100
|
+
- World A (baseline): `4242`
|
101
|
+
|
102
|
+
## Use Cases
|
103
|
+
|
104
|
+
- **Platform Implementation**: Implement transparency APIs with standardized data structures and serialization utilities.
|
105
|
+
|
106
|
+
- **Academic Research**: Generate controlled datasets for studying information dynamics with known parameters.
|
107
|
+
|
108
|
+
- **Education**: Provide realistic data for analysis exercises and transparency system understanding.
|
109
|
+
|
110
|
+
- **Testing & Validation**: Create test datasets for transparency system development without requiring real user data.
|
111
|
+
|
112
|
+
- **Algorithm Development**: Build transparency tools using standard data formats and proven utilities.
|
113
|
+
|
114
|
+
## Package Structure
|
115
|
+
|
116
|
+
```
|
117
|
+
ci.transparency.sdk/
|
118
|
+
├── cli/ # Command-line interfaces
|
119
|
+
├── digests.py # Content fingerprinting (SimHash64, MinHashSig, Digests)
|
120
|
+
├── hash_core.py # Content identification (HashId, ContentHash, TopHash)
|
121
|
+
├── ids.py # ID management (WorldId, TopicId, HashId)
|
122
|
+
├── io_schema.py # Serialization utilities (JSON conversion, dumps, loads)
|
123
|
+
└── window_agg.py # Window aggregation data structure (WindowAgg)
|
124
|
+
```
|
125
|
+
|
126
|
+
## Security Model
|
127
|
+
|
128
|
+
This package provides foundational building blocks for transparency applications.
|
129
|
+
It does not include:
|
130
|
+
- Detection algorithms or thresholds
|
131
|
+
- Verification workflows or assessment criteria
|
132
|
+
- Specific patterns that trigger alerts
|
133
|
+
|
134
|
+
Detection logic and verification tools are maintained separately to prevent adversarial use while enabling legitimate transparency system development.
|
135
|
+
|
136
|
+
## Documentation
|
137
|
+
|
138
|
+
Comprehensive docs are published with [MkDocs Material](https://squidfunk.github.io/mkdocs-material/):
|
139
|
+
|
140
|
+
- **[Home](https://civic-interconnect.github.io/civic-transparency-sdk/)** – project overview and installation instructions
|
141
|
+
- **[Usage Guide](https://civic-interconnect.github.io/civic-transparency-sdk/usage/)** – quick-start workflow and common tasks
|
142
|
+
- **[CLI Reference](https://civic-interconnect.github.io/civic-transparency-sdk/cli/)** – full command-line interface details
|
143
|
+
- **[SDK Reference](https://civic-interconnect.github.io/civic-transparency-sdk/sdk/overview/)** – core Python APIs
|
144
|
+
- [Window Aggregation](https://civic-interconnect.github.io/civic-transparency-sdk/sdk/window_agg/)
|
145
|
+
- [Content Hashing](https://civic-interconnect.github.io/civic-transparency-sdk/sdk/hash_core/)
|
146
|
+
- [Content Digests](https://civic-interconnect.github.io/civic-transparency-sdk/sdk/digests/)
|
147
|
+
- [ID Management](https://civic-interconnect.github.io/civic-transparency-sdk/sdk/ids/)
|
148
|
+
- [I/O Schema](https://civic-interconnect.github.io/civic-transparency-sdk/sdk/io_schema/)
|
149
|
+
- **[Schema Reference](https://civic-interconnect.github.io/civic-transparency-sdk/schema/)** – database schema and integration notes
|
150
|
+
- **Related Projects**
|
151
|
+
- [Civic Transparency Spec](https://civic-interconnect.github.io/civic-transparency-spec/)
|
152
|
+
- [Civic Transparency Types](https://civic-interconnect.github.io/civic-transparency-types/)
|
153
|
+
|
154
|
+
|
155
|
+
## License
|
156
|
+
|
157
|
+
See LICENSE file for details.
|
@@ -0,0 +1,14 @@
|
|
1
|
+
ci/transparency/sdk/__init__.py,sha256=47X_7MRUUyLpXTpt1-6NHAIpkF_oIjlNvt1-dShoRog,525
|
2
|
+
ci/transparency/sdk/_version.py,sha256=5jwwVncvCiTnhOedfkzzxmxsggwmTBORdFL_4wq0ZeY,704
|
3
|
+
ci/transparency/sdk/digests.py,sha256=GFiPdRdNxnws3mRqkrgxfwVpaMp_pxYHHvWbHdVlvNI,519
|
4
|
+
ci/transparency/sdk/hash_core.py,sha256=gdk5yExJRUek2gDjY7ZzQguqJ9tYnk_NxG9docWj52U,254
|
5
|
+
ci/transparency/sdk/ids.py,sha256=kcWRvlFbD4lF_8u0no36x-Ti46rBW4ovSC9qo6CS3kE,937
|
6
|
+
ci/transparency/sdk/io_schema.py,sha256=fToEqjMFGR1iEaJbg84hGMa32cyl8S16kYrSXDSgIIg,3574
|
7
|
+
ci/transparency/sdk/window_agg.py,sha256=Q_IsfsTqQJ9IaQ33JO5Ho_DHyT3g6Yp7u1bwXEw1Kt4,836
|
8
|
+
ci/transparency/sdk/cli/main.py,sha256=jHwPlfvOjGL1GGH5OMMdYxhOa9Q6my_zlsTFezRz72A,4915
|
9
|
+
civic_transparency_sdk-0.1.0.dist-info/licenses/LICENSE,sha256=k5rPGIMKa2A4ucUornVXNNnn4ODYJzw44y1lSSgNMmE,1083
|
10
|
+
civic_transparency_sdk-0.1.0.dist-info/METADATA,sha256=SogDY-JURGEyAS_T2kX2Z6FTAFRNyGoPSuCjqoChD_M,7316
|
11
|
+
civic_transparency_sdk-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
+
civic_transparency_sdk-0.1.0.dist-info/entry_points.txt,sha256=z22bc3Pqb-VLoVYbJTAqFJW9b0e5CJJbkzUcLPqHq_0,61
|
13
|
+
civic_transparency_sdk-0.1.0.dist-info/top_level.txt,sha256=4Yz6-7DIt5CedRfM7s3dxN7HstNIP9KBMBXro1MaVu0,3
|
14
|
+
civic_transparency_sdk-0.1.0.dist-info/RECORD,,
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Civic Interconnect
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the “Software”), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
@@ -0,0 +1 @@
|
|
1
|
+
ci
|