iocflow 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iocflow-0.1.0/.github/workflows/ci.yml +42 -0
- iocflow-0.1.0/.github/workflows/release.yml +62 -0
- iocflow-0.1.0/.gitignore +11 -0
- iocflow-0.1.0/CHANGELOG.md +20 -0
- iocflow-0.1.0/LICENSE +21 -0
- iocflow-0.1.0/PKG-INFO +181 -0
- iocflow-0.1.0/README.md +127 -0
- iocflow-0.1.0/pyproject.toml +58 -0
- iocflow-0.1.0/scripts/bump.py +149 -0
- iocflow-0.1.0/src/iocflow/__init__.py +63 -0
- iocflow-0.1.0/src/iocflow/allowlists.py +177 -0
- iocflow-0.1.0/src/iocflow/cli.py +62 -0
- iocflow-0.1.0/src/iocflow/extract.py +99 -0
- iocflow-0.1.0/src/iocflow/extractors/__init__.py +35 -0
- iocflow-0.1.0/src/iocflow/extractors/actors.py +107 -0
- iocflow-0.1.0/src/iocflow/extractors/contacts.py +15 -0
- iocflow-0.1.0/src/iocflow/extractors/files.py +84 -0
- iocflow-0.1.0/src/iocflow/extractors/network.py +150 -0
- iocflow-0.1.0/src/iocflow/extractors/vulns.py +37 -0
- iocflow-0.1.0/src/iocflow/mitre.py +133 -0
- iocflow-0.1.0/src/iocflow/models.py +157 -0
- iocflow-0.1.0/src/iocflow/providers.py +91 -0
- iocflow-0.1.0/src/iocflow/refang.py +27 -0
- iocflow-0.1.0/tests/test_cli.py +27 -0
- iocflow-0.1.0/tests/test_extract.py +238 -0
- iocflow-0.1.0/tests/test_providers_and_mitre.py +86 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: ["main"]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
# Cancel an in-progress run when a newer commit is pushed to the same ref.
|
|
9
|
+
concurrency:
|
|
10
|
+
group: ci-${{ github.ref }}
|
|
11
|
+
cancel-in-progress: true
|
|
12
|
+
|
|
13
|
+
permissions:
|
|
14
|
+
contents: read
|
|
15
|
+
|
|
16
|
+
jobs:
|
|
17
|
+
lint:
|
|
18
|
+
name: Lint (ruff)
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
steps:
|
|
21
|
+
- uses: actions/checkout@v5
|
|
22
|
+
- uses: actions/setup-python@v6
|
|
23
|
+
with:
|
|
24
|
+
python-version: "3.12"
|
|
25
|
+
- run: pip install ruff
|
|
26
|
+
- run: ruff check .
|
|
27
|
+
|
|
28
|
+
test:
|
|
29
|
+
name: Test (py${{ matrix.python-version }})
|
|
30
|
+
runs-on: ubuntu-latest
|
|
31
|
+
strategy:
|
|
32
|
+
fail-fast: false
|
|
33
|
+
matrix:
|
|
34
|
+
python-version: ["3.9", "3.10", "3.11", "3.12"]
|
|
35
|
+
steps:
|
|
36
|
+
- uses: actions/checkout@v5
|
|
37
|
+
- uses: actions/setup-python@v6
|
|
38
|
+
with:
|
|
39
|
+
python-version: ${{ matrix.python-version }}
|
|
40
|
+
- run: python -m pip install --upgrade pip
|
|
41
|
+
- run: pip install -e ".[dev]"
|
|
42
|
+
- run: python -m pytest -q
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
# Publishes to PyPI when a version tag (e.g. v0.1.0) is pushed.
|
|
4
|
+
# Uses PyPI Trusted Publishing (OIDC) — no API token is stored anywhere.
|
|
5
|
+
on:
|
|
6
|
+
push:
|
|
7
|
+
tags:
|
|
8
|
+
- "v*"
|
|
9
|
+
|
|
10
|
+
permissions:
|
|
11
|
+
contents: read
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
test:
|
|
15
|
+
name: Test (py${{ matrix.python-version }})
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
strategy:
|
|
18
|
+
matrix:
|
|
19
|
+
# Floor and ceiling of the supported range, so a tag can't publish
|
|
20
|
+
# something that breaks on the requires-python lower bound.
|
|
21
|
+
python-version: ["3.9", "3.12"]
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v5
|
|
24
|
+
- uses: actions/setup-python@v6
|
|
25
|
+
with:
|
|
26
|
+
python-version: ${{ matrix.python-version }}
|
|
27
|
+
- run: python -m pip install --upgrade pip
|
|
28
|
+
- run: pip install -e ".[dev]"
|
|
29
|
+
- run: python -m pytest -q
|
|
30
|
+
|
|
31
|
+
build:
|
|
32
|
+
name: Build distribution
|
|
33
|
+
needs: test
|
|
34
|
+
runs-on: ubuntu-latest
|
|
35
|
+
steps:
|
|
36
|
+
- uses: actions/checkout@v5
|
|
37
|
+
- uses: actions/setup-python@v6
|
|
38
|
+
with:
|
|
39
|
+
python-version: "3.x"
|
|
40
|
+
- run: python -m pip install --upgrade build
|
|
41
|
+
- run: python -m build
|
|
42
|
+
- run: python -m pip install --upgrade twine && python -m twine check dist/*
|
|
43
|
+
- uses: actions/upload-artifact@v4
|
|
44
|
+
with:
|
|
45
|
+
name: dist
|
|
46
|
+
path: dist/
|
|
47
|
+
|
|
48
|
+
publish:
|
|
49
|
+
name: Publish to PyPI
|
|
50
|
+
needs: build
|
|
51
|
+
runs-on: ubuntu-latest
|
|
52
|
+
environment:
|
|
53
|
+
name: pypi
|
|
54
|
+
url: https://pypi.org/project/iocflow/
|
|
55
|
+
permissions:
|
|
56
|
+
id-token: write # required for trusted publishing (OIDC)
|
|
57
|
+
steps:
|
|
58
|
+
- uses: actions/download-artifact@v4
|
|
59
|
+
with:
|
|
60
|
+
name: dist
|
|
61
|
+
path: dist/
|
|
62
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
iocflow-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## Unreleased
|
|
4
|
+
|
|
5
|
+
## 0.1.0 (2026-05-30)
|
|
6
|
+
|
|
7
|
+
- Initial release — Layer 1: threat-entity extraction.
|
|
8
|
+
- `extract(text)` pulls IPs, domains, URLs, filenames, hashes (MD5/SHA1/SHA256),
|
|
9
|
+
CVEs, emails, MITRE technique IDs, threat actors, and malware families from
|
|
10
|
+
unstructured text.
|
|
11
|
+
- `refang_text` re-fangs defanged IOCs (`[.]`, `[at]`, `hxxp`, …) before extraction.
|
|
12
|
+
- Domain validation via `tldextract` (Mozilla Public Suffix List); broad
|
|
13
|
+
benign-domain / benign-IP allowlists; three-layer malware false-positive defense.
|
|
14
|
+
- Pluggable enrichment sources: `MalwareNames` and `ActorAliases` — supply your
|
|
15
|
+
own name sets; the core has no external-data dependency and works fully without them.
|
|
16
|
+
- Optional `iocflow[mitre]` extra: `mitre.mitre_malware_names()` fetches the public
|
|
17
|
+
MITRE ATT&CK STIX bundle and returns a ready-made `MalwareNames` (7-day disk cache).
|
|
18
|
+
- `ExtractedEntities.iter_indicators()` yields flat `(kind, value)` indicators —
|
|
19
|
+
the input surface for future enrichment layers.
|
|
20
|
+
- `iocflow` CLI / `python -m iocflow` with `--json`, `--no-refang`, `--mitre`.
|
iocflow-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Vinay Vobbilichetty
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
iocflow-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: iocflow
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Extract threat indicators (IOCs) from unstructured text — IPs, domains, URLs, hashes, CVEs, MITRE techniques, threat actors, and malware families. Layer 1 of an IOC-lifecycle toolkit.
|
|
5
|
+
Project-URL: Homepage, https://github.com/vinayvobbili/iocflow
|
|
6
|
+
Project-URL: Repository, https://github.com/vinayvobbili/iocflow
|
|
7
|
+
Project-URL: Issues, https://github.com/vinayvobbili/iocflow/issues
|
|
8
|
+
Author-email: Vinay Vobbilichetty <vinayvobbilichetty11@gmail.com>
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2026 Vinay Vobbilichetty
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Keywords: cve,cybersecurity,dfir,extraction,indicators-of-compromise,ioc,malware,mitre-attack,threat-actor,threat-intelligence
|
|
32
|
+
Classifier: Development Status :: 4 - Beta
|
|
33
|
+
Classifier: Intended Audience :: Developers
|
|
34
|
+
Classifier: Intended Audience :: Information Technology
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Programming Language :: Python :: 3
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
41
|
+
Classifier: Topic :: Security
|
|
42
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
43
|
+
Requires-Python: >=3.9
|
|
44
|
+
Requires-Dist: tldextract>=3.4
|
|
45
|
+
Provides-Extra: dev
|
|
46
|
+
Requires-Dist: build>=1.0; extra == 'dev'
|
|
47
|
+
Requires-Dist: pytest>=7; extra == 'dev'
|
|
48
|
+
Requires-Dist: requests>=2.25; extra == 'dev'
|
|
49
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
50
|
+
Requires-Dist: twine>=5.0; extra == 'dev'
|
|
51
|
+
Provides-Extra: mitre
|
|
52
|
+
Requires-Dist: requests>=2.25; extra == 'mitre'
|
|
53
|
+
Description-Content-Type: text/markdown
|
|
54
|
+
|
|
55
|
+
# iocflow
|
|
56
|
+
|
|
57
|
+
[](https://github.com/vinayvobbili/iocflow/actions/workflows/ci.yml)
|
|
58
|
+
[](https://pypi.org/project/iocflow/)
|
|
59
|
+
[](https://pypi.org/project/iocflow/)
|
|
60
|
+
[](https://github.com/vinayvobbili/iocflow/blob/main/LICENSE)
|
|
61
|
+
|
|
62
|
+
Pull **indicators of compromise** out of unstructured text — threat-intel
|
|
63
|
+
reports, advisories, emails, tickets — in one call. iocflow extracts IPs,
|
|
64
|
+
domains, URLs, filenames, file hashes, CVEs, MITRE ATT&CK technique IDs, threat
|
|
65
|
+
actors, and malware families, with the false-positive defenses you'd otherwise
|
|
66
|
+
write by hand: a Public Suffix List domain validator, benign-domain/IP
|
|
67
|
+
allowlists, hash de-duplication across MD5/SHA1/SHA256, and re-fanging of
|
|
68
|
+
defanged IOCs.
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
from iocflow import extract
|
|
72
|
+
|
|
73
|
+
text = """
|
|
74
|
+
APT28 (a.k.a. Fancy Bear) staged Cobalt Strike from evil-domain[.]ru and
|
|
75
|
+
185.220.101.5, dropping install.ps1 (MD5 a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4).
|
|
76
|
+
Exploited CVE-2021-44228 via T1190. Contact: ops@evil-domain[.]ru.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
entities = extract(text)
|
|
80
|
+
print(entities.summary())
|
|
81
|
+
# 1 IPs, 1 domains, 1 filenames, 1 hashes, 1 CVEs, 1 emails, 1 threat actors, 1 MITRE techniques
|
|
82
|
+
|
|
83
|
+
for ind in entities.iter_indicators():
|
|
84
|
+
print(ind.kind, ind.value)
|
|
85
|
+
# ip 185.220.101.5
|
|
86
|
+
# domain evil-domain.ru
|
|
87
|
+
# ...
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
The defanged `evil-domain[.]ru` and `ops@evil-domain[.]ru` are re-fanged
|
|
91
|
+
automatically; `185.220.101.5` is kept while private/benign IPs are dropped.
|
|
92
|
+
|
|
93
|
+
## Install
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
pip install iocflow # core — one dependency (tldextract)
|
|
97
|
+
pip install "iocflow[mitre]" # + a ready-made MITRE ATT&CK malware-name source
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## What it extracts
|
|
101
|
+
|
|
102
|
+
`extract(text)` returns an `ExtractedEntities` with:
|
|
103
|
+
|
|
104
|
+
- `ips` — public IPv4, excluding private ranges, benign IPs, and version-number-like values
|
|
105
|
+
- `domains` — validated against the Mozilla Public Suffix List via `tldextract`
|
|
106
|
+
- `urls` — both `https://…` and bare `host/path` forms (so package-registry paths survive)
|
|
107
|
+
- `filenames` — suspicious script/executable/macro/archive filenames
|
|
108
|
+
- `hashes` — `{"md5": [...], "sha1": [...], "sha256": [...]}`, de-duplicated across lengths
|
|
109
|
+
- `cves` — `CVE-YYYY-NNNN+`, normalized to uppercase
|
|
110
|
+
- `emails`
|
|
111
|
+
- `mitre_techniques` — `T1059`, `T1059.001`, …
|
|
112
|
+
- `threat_actors` (+ `threat_actors_enriched`) — APT/UNC/FIN/TA/DEV/STORM designators,
|
|
113
|
+
a curated well-known list, and the `"<Name> ransomware"` pattern
|
|
114
|
+
- `malware_families` — populated when you supply a malware-name source (see below)
|
|
115
|
+
|
|
116
|
+
Each individual extractor is also importable and composable:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from iocflow import extract_ips, extract_hashes, refang_text
|
|
120
|
+
extract_ips(refang_text("c2 at 185[.]220[.]101[.]5")) # ['185.220.101.5']
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Pluggable name sources
|
|
124
|
+
|
|
125
|
+
The core has **no external-data dependency**. Two enrichment sources are
|
|
126
|
+
optional and supplied by you, so iocflow drops cleanly into any environment —
|
|
127
|
+
plug in your own feeds, or use the bundled MITRE extra.
|
|
128
|
+
|
|
129
|
+
**Malware families.** Give `extract` a `MalwareNames` and it matches families
|
|
130
|
+
(with alias-to-canonical normalization) behind a three-layer false-positive
|
|
131
|
+
defense. Build one from your own list, from MITRE-shaped records, or from the
|
|
132
|
+
optional extra:
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from iocflow import extract, MalwareNames
|
|
136
|
+
|
|
137
|
+
# Your own list:
|
|
138
|
+
names = MalwareNames.from_names(["Cobalt Strike", "Emotet", "Qakbot"])
|
|
139
|
+
entities = extract(report_text, malware_names=names)
|
|
140
|
+
|
|
141
|
+
# Or the bundled MITRE ATT&CK source (needs: pip install "iocflow[mitre]"):
|
|
142
|
+
from iocflow.mitre import mitre_malware_names
|
|
143
|
+
entities = extract(report_text, malware_names=mitre_malware_names())
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
**Threat-actor aliases.** Give `extract` an `ActorAliases` to match a custom
|
|
147
|
+
name set and enrich actors with `common_name` / `region` / `all_names`. Without
|
|
148
|
+
it, actors are still found by pattern and curated list:
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
from iocflow import extract, ActorAliases
|
|
152
|
+
|
|
153
|
+
aliases = ActorAliases.from_index({
|
|
154
|
+
"apt28": {"common_name": "APT28", "region": "Russia",
|
|
155
|
+
"all_names": ["Fancy Bear", "Sofacy", "Sednit"]},
|
|
156
|
+
})
|
|
157
|
+
entities = extract(report_text, actor_aliases=aliases)
|
|
158
|
+
entities.threat_actors_enriched[0].region # "Russia"
|
|
159
|
+
entities.threat_actors_enriched[0].aliases_display() # "Fancy Bear, Sofacy, Sednit"
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Command line
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
iocflow "APT28 used 185.220.101.5 and evil[.]example[.]com"
|
|
166
|
+
echo "report text…" | iocflow --json
|
|
167
|
+
iocflow --mitre "Emotet dropped Cobalt Strike" # needs iocflow[mitre]
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Where this is going
|
|
171
|
+
|
|
172
|
+
iocflow is **Layer 1** of an IOC-lifecycle toolkit. The plan is to grow it in
|
|
173
|
+
independently-useful layers, each behind its own pip extra: enrichment
|
|
174
|
+
(VirusTotal, Recorded Future, AbuseIPDB, Shodan, abuse.ch), AI commentary,
|
|
175
|
+
suggested hunts, and optional perimeter blocking — each configured by plugging
|
|
176
|
+
in your own API keys. `ExtractedEntities` (and its `iter_indicators()` view) is
|
|
177
|
+
the stable hand-off type those layers consume.
|
|
178
|
+
|
|
179
|
+
## License
|
|
180
|
+
|
|
181
|
+
MIT
|
iocflow-0.1.0/README.md
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# iocflow
|
|
2
|
+
|
|
3
|
+
[](https://github.com/vinayvobbili/iocflow/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/iocflow/)
|
|
5
|
+
[](https://pypi.org/project/iocflow/)
|
|
6
|
+
[](https://github.com/vinayvobbili/iocflow/blob/main/LICENSE)
|
|
7
|
+
|
|
8
|
+
Pull **indicators of compromise** out of unstructured text — threat-intel
|
|
9
|
+
reports, advisories, emails, tickets — in one call. iocflow extracts IPs,
|
|
10
|
+
domains, URLs, filenames, file hashes, CVEs, MITRE ATT&CK technique IDs, threat
|
|
11
|
+
actors, and malware families, with the false-positive defenses you'd otherwise
|
|
12
|
+
write by hand: a Public Suffix List domain validator, benign-domain/IP
|
|
13
|
+
allowlists, hash de-duplication across MD5/SHA1/SHA256, and re-fanging of
|
|
14
|
+
defanged IOCs.
|
|
15
|
+
|
|
16
|
+
```python
|
|
17
|
+
from iocflow import extract
|
|
18
|
+
|
|
19
|
+
text = """
|
|
20
|
+
APT28 (a.k.a. Fancy Bear) staged Cobalt Strike from evil-domain[.]ru and
|
|
21
|
+
185.220.101.5, dropping install.ps1 (MD5 a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4).
|
|
22
|
+
Exploited CVE-2021-44228 via T1190. Contact: ops@evil-domain[.]ru.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
entities = extract(text)
|
|
26
|
+
print(entities.summary())
|
|
27
|
+
# 1 IPs, 1 domains, 1 filenames, 1 hashes, 1 CVEs, 1 emails, 1 threat actors, 1 MITRE techniques
|
|
28
|
+
|
|
29
|
+
for ind in entities.iter_indicators():
|
|
30
|
+
print(ind.kind, ind.value)
|
|
31
|
+
# ip 185.220.101.5
|
|
32
|
+
# domain evil-domain.ru
|
|
33
|
+
# ...
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
The defanged `evil-domain[.]ru` and `ops@evil-domain[.]ru` are re-fanged
|
|
37
|
+
automatically; `185.220.101.5` is kept while private/benign IPs are dropped.
|
|
38
|
+
|
|
39
|
+
## Install
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install iocflow # core — one dependency (tldextract)
|
|
43
|
+
pip install "iocflow[mitre]" # + a ready-made MITRE ATT&CK malware-name source
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## What it extracts
|
|
47
|
+
|
|
48
|
+
`extract(text)` returns an `ExtractedEntities` with:
|
|
49
|
+
|
|
50
|
+
- `ips` — public IPv4, excluding private ranges, benign IPs, and version-number-like values
|
|
51
|
+
- `domains` — validated against the Mozilla Public Suffix List via `tldextract`
|
|
52
|
+
- `urls` — both `https://…` and bare `host/path` forms (so package-registry paths survive)
|
|
53
|
+
- `filenames` — suspicious script/executable/macro/archive filenames
|
|
54
|
+
- `hashes` — `{"md5": [...], "sha1": [...], "sha256": [...]}`, de-duplicated across lengths
|
|
55
|
+
- `cves` — `CVE-YYYY-NNNN+`, normalized to uppercase
|
|
56
|
+
- `emails`
|
|
57
|
+
- `mitre_techniques` — `T1059`, `T1059.001`, …
|
|
58
|
+
- `threat_actors` (+ `threat_actors_enriched`) — APT/UNC/FIN/TA/DEV/STORM designators,
|
|
59
|
+
a curated well-known list, and the `"<Name> ransomware"` pattern
|
|
60
|
+
- `malware_families` — populated when you supply a malware-name source (see below)
|
|
61
|
+
|
|
62
|
+
Each individual extractor is also importable and composable:
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from iocflow import extract_ips, extract_hashes, refang_text
|
|
66
|
+
extract_ips(refang_text("c2 at 185[.]220[.]101[.]5")) # ['185.220.101.5']
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Pluggable name sources
|
|
70
|
+
|
|
71
|
+
The core has **no external-data dependency**. Two enrichment sources are
|
|
72
|
+
optional and supplied by you, so iocflow drops cleanly into any environment —
|
|
73
|
+
plug in your own feeds, or use the bundled MITRE extra.
|
|
74
|
+
|
|
75
|
+
**Malware families.** Give `extract` a `MalwareNames` and it matches families
|
|
76
|
+
(with alias-to-canonical normalization) behind a three-layer false-positive
|
|
77
|
+
defense. Build one from your own list, from MITRE-shaped records, or from the
|
|
78
|
+
optional extra:
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from iocflow import extract, MalwareNames
|
|
82
|
+
|
|
83
|
+
# Your own list:
|
|
84
|
+
names = MalwareNames.from_names(["Cobalt Strike", "Emotet", "Qakbot"])
|
|
85
|
+
entities = extract(report_text, malware_names=names)
|
|
86
|
+
|
|
87
|
+
# Or the bundled MITRE ATT&CK source (needs: pip install "iocflow[mitre]"):
|
|
88
|
+
from iocflow.mitre import mitre_malware_names
|
|
89
|
+
entities = extract(report_text, malware_names=mitre_malware_names())
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
**Threat-actor aliases.** Give `extract` an `ActorAliases` to match a custom
|
|
93
|
+
name set and enrich actors with `common_name` / `region` / `all_names`. Without
|
|
94
|
+
it, actors are still found by pattern and curated list:
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from iocflow import extract, ActorAliases
|
|
98
|
+
|
|
99
|
+
aliases = ActorAliases.from_index({
|
|
100
|
+
"apt28": {"common_name": "APT28", "region": "Russia",
|
|
101
|
+
"all_names": ["Fancy Bear", "Sofacy", "Sednit"]},
|
|
102
|
+
})
|
|
103
|
+
entities = extract(report_text, actor_aliases=aliases)
|
|
104
|
+
entities.threat_actors_enriched[0].region # "Russia"
|
|
105
|
+
entities.threat_actors_enriched[0].aliases_display() # "Fancy Bear, Sofacy, Sednit"
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Command line
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
iocflow "APT28 used 185.220.101.5 and evil[.]example[.]com"
|
|
112
|
+
echo "report text…" | iocflow --json
|
|
113
|
+
iocflow --mitre "Emotet dropped Cobalt Strike" # needs iocflow[mitre]
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Where this is going
|
|
117
|
+
|
|
118
|
+
iocflow is **Layer 1** of an IOC-lifecycle toolkit. The plan is to grow it in
|
|
119
|
+
independently-useful layers, each behind its own pip extra: enrichment
|
|
120
|
+
(VirusTotal, Recorded Future, AbuseIPDB, Shodan, abuse.ch), AI commentary,
|
|
121
|
+
suggested hunts, and optional perimeter blocking — each configured by plugging
|
|
122
|
+
in your own API keys. `ExtractedEntities` (and its `iter_indicators()` view) is
|
|
123
|
+
the stable hand-off type those layers consume.
|
|
124
|
+
|
|
125
|
+
## License
|
|
126
|
+
|
|
127
|
+
MIT
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "iocflow"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Extract threat indicators (IOCs) from unstructured text — IPs, domains, URLs, hashes, CVEs, MITRE techniques, threat actors, and malware families. Layer 1 of an IOC-lifecycle toolkit."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { file = "LICENSE" }
|
|
12
|
+
authors = [{ name = "Vinay Vobbilichetty", email = "vinayvobbilichetty11@gmail.com" }]
|
|
13
|
+
keywords = [
|
|
14
|
+
"ioc", "indicators-of-compromise", "threat-intelligence", "cybersecurity",
|
|
15
|
+
"extraction", "mitre-attack", "cve", "threat-actor", "malware", "dfir",
|
|
16
|
+
]
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Development Status :: 4 - Beta",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"Intended Audience :: Information Technology",
|
|
21
|
+
"License :: OSI Approved :: MIT License",
|
|
22
|
+
"Programming Language :: Python :: 3",
|
|
23
|
+
"Programming Language :: Python :: 3.9",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Topic :: Security",
|
|
28
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
29
|
+
]
|
|
30
|
+
dependencies = [
|
|
31
|
+
"tldextract>=3.4",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[project.optional-dependencies]
|
|
35
|
+
# Bundled MITRE ATT&CK malware-name provider (fetches the public STIX bundle).
|
|
36
|
+
mitre = ["requests>=2.25"]
|
|
37
|
+
dev = [
|
|
38
|
+
"requests>=2.25",
|
|
39
|
+
"pytest>=7",
|
|
40
|
+
"ruff>=0.4",
|
|
41
|
+
"build>=1.0",
|
|
42
|
+
"twine>=5.0",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[project.scripts]
|
|
46
|
+
iocflow = "iocflow.cli:main"
|
|
47
|
+
|
|
48
|
+
[project.urls]
|
|
49
|
+
Homepage = "https://github.com/vinayvobbili/iocflow"
|
|
50
|
+
Repository = "https://github.com/vinayvobbili/iocflow"
|
|
51
|
+
Issues = "https://github.com/vinayvobbili/iocflow/issues"
|
|
52
|
+
|
|
53
|
+
[tool.hatch.build.targets.wheel]
|
|
54
|
+
packages = ["src/iocflow"]
|
|
55
|
+
|
|
56
|
+
[tool.ruff]
|
|
57
|
+
line-length = 100
|
|
58
|
+
target-version = "py39"
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Bump the version, update the changelog, commit, and tag a release.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
python scripts/bump.py patch # 0.1.0 -> 0.1.1
|
|
6
|
+
python scripts/bump.py minor # 0.1.0 -> 0.2.0
|
|
7
|
+
python scripts/bump.py major # 0.1.0 -> 1.0.0
|
|
8
|
+
python scripts/bump.py 0.5.0 # set an explicit version
|
|
9
|
+
python scripts/bump.py patch --push # also push main + the tag (triggers the PyPI publish)
|
|
10
|
+
|
|
11
|
+
What it does, atomically:
|
|
12
|
+
1. Bumps `version` in pyproject.toml AND `__version__` in the package __init__
|
|
13
|
+
(the two must never drift apart).
|
|
14
|
+
2. Promotes the CHANGELOG "## Unreleased" section to "## X.Y.Z (today)" and
|
|
15
|
+
opens a fresh empty "## Unreleased" above it.
|
|
16
|
+
3. Commits "release: vX.Y.Z" and creates an annotated tag vX.Y.Z.
|
|
17
|
+
|
|
18
|
+
Pushing the tag is what triggers .github/workflows/release.yml, which tests,
|
|
19
|
+
builds, and publishes to PyPI via trusted publishing (no token).
|
|
20
|
+
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import argparse
|
|
24
|
+
import datetime
|
|
25
|
+
import pathlib
|
|
26
|
+
import re
|
|
27
|
+
import subprocess
|
|
28
|
+
import sys
|
|
29
|
+
|
|
30
|
+
ROOT = pathlib.Path(__file__).resolve().parent.parent
|
|
31
|
+
PYPROJECT = ROOT / "pyproject.toml"
|
|
32
|
+
INIT = ROOT / "src" / "iocflow" / "__init__.py"
|
|
33
|
+
CHANGELOG = ROOT / "CHANGELOG.md"
|
|
34
|
+
|
|
35
|
+
SEMVER = re.compile(r"^(\d+)\.(\d+)\.(\d+)$")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def run(*args: str) -> str:
|
|
39
|
+
return subprocess.run(
|
|
40
|
+
args, cwd=ROOT, check=True, text=True, capture_output=True
|
|
41
|
+
).stdout
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def die(msg: str) -> "None":
|
|
45
|
+
sys.exit(f"bump: {msg}")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def current_version() -> str:
|
|
49
|
+
m = re.search(r'^version\s*=\s*"([^"]+)"', PYPROJECT.read_text(), re.M)
|
|
50
|
+
if not m:
|
|
51
|
+
die("could not find `version = \"...\"` in pyproject.toml")
|
|
52
|
+
return m.group(1)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def next_version(cur: str, part: str) -> str:
|
|
56
|
+
m = SEMVER.match(cur)
|
|
57
|
+
if not m:
|
|
58
|
+
die(f"current version {cur!r} is not X.Y.Z")
|
|
59
|
+
major, minor, patch = (int(x) for x in m.groups())
|
|
60
|
+
return {
|
|
61
|
+
"major": f"{major + 1}.0.0",
|
|
62
|
+
"minor": f"{major}.{minor + 1}.0",
|
|
63
|
+
"patch": f"{major}.{minor}.{patch + 1}",
|
|
64
|
+
}[part]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def ensure_releasable() -> None:
|
|
68
|
+
dirty = run("git", "status", "--porcelain").strip()
|
|
69
|
+
if dirty:
|
|
70
|
+
die("working tree is not clean; commit or stash first:\n" + dirty)
|
|
71
|
+
branch = run("git", "rev-parse", "--abbrev-ref", "HEAD").strip()
|
|
72
|
+
if branch != "main":
|
|
73
|
+
die(f"on branch {branch!r}; release from main")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def replace_once(path: pathlib.Path, pattern: str, replacement: str) -> None:
|
|
77
|
+
text = path.read_text()
|
|
78
|
+
new, n = re.subn(pattern, replacement, text, count=1, flags=re.M)
|
|
79
|
+
if n != 1:
|
|
80
|
+
die(f"expected exactly one match for {pattern!r} in {path.name} (found {n})")
|
|
81
|
+
path.write_text(new)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def roll_changelog(new: str) -> None:
|
|
85
|
+
text = CHANGELOG.read_text()
|
|
86
|
+
today = datetime.date.today().isoformat()
|
|
87
|
+
if re.search(r"^##\s+Unreleased\s*$", text, re.M | re.I):
|
|
88
|
+
# Promote Unreleased -> the new version, and open a fresh Unreleased above.
|
|
89
|
+
text = re.sub(
|
|
90
|
+
r"^##\s+Unreleased\s*$",
|
|
91
|
+
f"## Unreleased\n\n## {new} ({today})",
|
|
92
|
+
text,
|
|
93
|
+
count=1,
|
|
94
|
+
flags=re.M | re.I,
|
|
95
|
+
)
|
|
96
|
+
else:
|
|
97
|
+
# No Unreleased section: insert a stub right under the top heading.
|
|
98
|
+
lines = text.splitlines(keepends=True)
|
|
99
|
+
for i, line in enumerate(lines):
|
|
100
|
+
if line.lstrip().lower().startswith("# changelog"):
|
|
101
|
+
j = i + 1
|
|
102
|
+
while j < len(lines) and not lines[j].strip():
|
|
103
|
+
j += 1
|
|
104
|
+
lines.insert(j, f"## {new} ({today})\n\n- TODO: describe changes.\n\n")
|
|
105
|
+
break
|
|
106
|
+
else:
|
|
107
|
+
lines = [f"# Changelog\n\n## {new} ({today})\n\n- TODO: describe changes.\n\n", *lines]
|
|
108
|
+
text = "".join(lines)
|
|
109
|
+
CHANGELOG.write_text(text)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def main() -> None:
|
|
113
|
+
ap = argparse.ArgumentParser(description="Bump version, update changelog, commit, tag.")
|
|
114
|
+
ap.add_argument("version", help="patch | minor | major | explicit X.Y.Z")
|
|
115
|
+
ap.add_argument("--push", action="store_true",
|
|
116
|
+
help="also push main and the tag (triggers the PyPI publish workflow)")
|
|
117
|
+
args = ap.parse_args()
|
|
118
|
+
|
|
119
|
+
ensure_releasable()
|
|
120
|
+
cur = current_version()
|
|
121
|
+
if args.version in ("patch", "minor", "major"):
|
|
122
|
+
new = next_version(cur, args.version)
|
|
123
|
+
elif SEMVER.match(args.version):
|
|
124
|
+
new = args.version
|
|
125
|
+
else:
|
|
126
|
+
die("version must be patch|minor|major or X.Y.Z")
|
|
127
|
+
if new == cur:
|
|
128
|
+
die(f"new version equals current ({cur})")
|
|
129
|
+
|
|
130
|
+
print(f"{cur} -> {new}")
|
|
131
|
+
replace_once(PYPROJECT, r'^version\s*=\s*"[^"]+"', f'version = "{new}"')
|
|
132
|
+
replace_once(INIT, r'^__version__\s*=\s*"[^"]+"', f'__version__ = "{new}"')
|
|
133
|
+
roll_changelog(new)
|
|
134
|
+
|
|
135
|
+
run("git", "add", "pyproject.toml", "src/iocflow/__init__.py", "CHANGELOG.md")
|
|
136
|
+
run("git", "commit", "-m", f"release: v{new}")
|
|
137
|
+
run("git", "tag", "-a", f"v{new}", "-m", f"iocflow {new}")
|
|
138
|
+
print(f"committed + tagged v{new}")
|
|
139
|
+
|
|
140
|
+
if args.push:
|
|
141
|
+
run("git", "push", "origin", "main")
|
|
142
|
+
run("git", "push", "origin", f"v{new}")
|
|
143
|
+
print(f"pushed main + v{new} — the Release workflow will publish to PyPI")
|
|
144
|
+
else:
|
|
145
|
+
print(f"next: git push origin main && git push origin v{new}")
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
if __name__ == "__main__":
|
|
149
|
+
main()
|