dogesec-commons 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. dogesec_commons/__init__.py +0 -0
  2. dogesec_commons/asgi.py +16 -0
  3. dogesec_commons/objects/__init__.py +1 -0
  4. dogesec_commons/objects/apps.py +10 -0
  5. dogesec_commons/objects/conf.py +8 -0
  6. dogesec_commons/objects/db_view_creator.py +164 -0
  7. dogesec_commons/objects/helpers.py +660 -0
  8. dogesec_commons/objects/views.py +427 -0
  9. dogesec_commons/settings.py +161 -0
  10. dogesec_commons/stixifier/__init__.py +0 -0
  11. dogesec_commons/stixifier/apps.py +5 -0
  12. dogesec_commons/stixifier/conf.py +1 -0
  13. dogesec_commons/stixifier/migrations/0001_initial.py +36 -0
  14. dogesec_commons/stixifier/migrations/0002_profile_ai_content_check_variable.py +18 -0
  15. dogesec_commons/stixifier/migrations/0003_rename_ai_content_check_variable_profile_ai_content_check_provider_and_more.py +23 -0
  16. dogesec_commons/stixifier/migrations/0004_profile_identity_id.py +18 -0
  17. dogesec_commons/stixifier/migrations/0005_profile_generate_pdf.py +18 -0
  18. dogesec_commons/stixifier/migrations/__init__.py +0 -0
  19. dogesec_commons/stixifier/models.py +57 -0
  20. dogesec_commons/stixifier/serializers.py +192 -0
  21. dogesec_commons/stixifier/stixifier.py +252 -0
  22. dogesec_commons/stixifier/summarizer.py +62 -0
  23. dogesec_commons/stixifier/views.py +193 -0
  24. dogesec_commons/urls.py +45 -0
  25. dogesec_commons/utils/__init__.py +3 -0
  26. dogesec_commons/utils/autoschema.py +88 -0
  27. dogesec_commons/utils/exceptions.py +28 -0
  28. dogesec_commons/utils/filters.py +66 -0
  29. dogesec_commons/utils/ordering.py +47 -0
  30. dogesec_commons/utils/pagination.py +81 -0
  31. dogesec_commons/utils/schemas.py +27 -0
  32. dogesec_commons/utils/serializers.py +47 -0
  33. dogesec_commons/wsgi.py +16 -0
  34. dogesec_commons-1.0.2.dist-info/METADATA +57 -0
  35. dogesec_commons-1.0.2.dist-info/RECORD +37 -0
  36. dogesec_commons-1.0.2.dist-info/WHEEL +4 -0
  37. dogesec_commons-1.0.2.dist-info/licenses/LICENSE +202 -0
@@ -0,0 +1,18 @@
1
+ # Generated by Django 5.1.5 on 2025-05-01 09:48
2
+
3
+ from django.db import migrations, models
4
+
5
+
6
+ class Migration(migrations.Migration):
7
+
8
+ dependencies = [
9
+ ('dogesec_stixifier', '0003_rename_ai_content_check_variable_profile_ai_content_check_provider_and_more'),
10
+ ]
11
+
12
+ operations = [
13
+ migrations.AddField(
14
+ model_name='profile',
15
+ name='identity_id',
16
+ field=models.CharField(default=None, max_length=46, null=True),
17
+ ),
18
+ ]
@@ -0,0 +1,18 @@
1
+ # Generated by Django 5.1.5 on 2025-06-24 12:58
2
+
3
+ from django.db import migrations, models
4
+
5
+
6
+ class Migration(migrations.Migration):
7
+
8
+ dependencies = [
9
+ ('dogesec_stixifier', '0004_profile_identity_id'),
10
+ ]
11
+
12
+ operations = [
13
+ migrations.AddField(
14
+ model_name='profile',
15
+ name='generate_pdf',
16
+ field=models.BooleanField(default=False),
17
+ ),
18
+ ]
File without changes
@@ -0,0 +1,57 @@
1
+ import txt2stix
2
+ from django.conf import settings
3
+ from django.db import models
4
+ from django.contrib.postgres.fields import ArrayField
5
+ import uuid
6
+ from functools import partial
7
+ import txt2stix.common
8
+ import txt2stix, txt2stix.extractions
9
+ from django.core.exceptions import ValidationError
10
+
11
+
12
+ class RelationshipMode(models.TextChoices):
13
+ AI = "ai", "AI Relationship"
14
+ STANDARD = "standard", "Standard Relationship"
15
+
16
+ def validate_extractor(types, name):
17
+ extractors = txt2stix.extractions.parse_extraction_config(
18
+ txt2stix.txt2stix.INCLUDES_PATH
19
+ ).values()
20
+ for extractor in extractors:
21
+ if name == extractor.slug and extractor.type in types:
22
+ return True
23
+ raise ValidationError(f"{name} does not exist", 400)
24
+
25
+
26
+ class Profile(models.Model):
27
+ id = models.UUIDField(primary_key=True)
28
+ created = models.DateTimeField(auto_now_add=True)
29
+ name = models.CharField(max_length=250, unique=True)
30
+ identity_id = models.CharField(max_length=46, null=True, default=None)
31
+ extractions = ArrayField(base_field=models.CharField(max_length=256))
32
+ relationship_mode = models.CharField(choices=RelationshipMode.choices, max_length=20, default=RelationshipMode.STANDARD)
33
+ extract_text_from_image = models.BooleanField(default=False)
34
+ defang = models.BooleanField()
35
+ generate_pdf = models.BooleanField(default=False)
36
+ ai_settings_relationships = models.CharField(max_length=256, blank=False, null=True)
37
+ ai_settings_extractions = ArrayField(base_field=models.CharField(max_length=256), default=list)
38
+ ai_content_check_provider = models.CharField(default=None, null=True, blank=False, max_length=256)
39
+ ai_summary_provider = models.CharField(max_length=256, blank=False, null=True)
40
+ ai_create_attack_flow = models.BooleanField(default=True)
41
+ ignore_image_refs = models.BooleanField(default=True)
42
+ ignore_link_refs = models.BooleanField(default=True)
43
+ ignore_extraction_boundary = models.BooleanField(default=False)
44
+
45
+ #############
46
+ ignore_embedded_relationships_sro = models.BooleanField(default=True)
47
+ ignore_embedded_relationships_smo = models.BooleanField(default=True)
48
+ ignore_embedded_relationships = models.BooleanField(default=False)
49
+
50
+
51
+ def save(self, *args, **kwargs) -> None:
52
+ if not self.id:
53
+ name = self.name
54
+ if self.identity_id:
55
+ name = f"{self.name}+{self.identity_id}"
56
+ self.id = uuid.uuid5(settings.STIXIFIER_NAMESPACE, name)
57
+ return super().save(*args, **kwargs)
@@ -0,0 +1,192 @@
1
+ import argparse
2
+ import contextlib
3
+ from functools import partial
4
+ import uuid
5
+ from rest_framework import serializers
6
+
7
+ from . import conf
8
+ from .models import Profile
9
+ from rest_framework import serializers
10
+ import txt2stix.extractions
11
+ import txt2stix.txt2stix
12
+ from urllib.parse import urljoin
13
+ from django.conf import settings
14
+ from django.contrib.postgres.fields import ArrayField
15
+ from rest_framework.validators import ValidationError
16
+ from dogesec_commons.utils.serializers import CommonErrorSerializer
17
+
18
+ from drf_spectacular.utils import OpenApiResponse, OpenApiExample
19
+
20
+ from drf_spectacular.utils import OpenApiResponse, OpenApiExample
21
+
22
+ from django.db import models
23
+
24
+
25
+ def validate_model(model):
26
+ if not model:
27
+ return None
28
+ try:
29
+ extractor = txt2stix.txt2stix.parse_model(model)
30
+ except BaseException as e:
31
+ raise ValidationError(f"invalid model: {model}")
32
+ return model
33
+
34
+ def validate_extractor(typestr, types, name):
35
+ extractors = txt2stix.extractions.parse_extraction_config(
36
+ txt2stix.txt2stix.INCLUDES_PATH
37
+ )
38
+ if name not in extractors or extractors[name].type not in types:
39
+ raise ValidationError(f"`{name}` is not a valid {typestr}", 400)
40
+
41
+ def validate_stix_id(stix_id: str, type: str):
42
+ type_part, _, id_part = stix_id.partition('--')
43
+ if type_part != type:
44
+ raise ValidationError(f"Invalid STIX ID for type `{type}`")
45
+ with contextlib.suppress(Exception):
46
+ uuid.UUID(id_part)
47
+ return stix_id
48
+ raise ValidationError("Invalid STIX ID")
49
+
50
+
51
+ def uses_ai(slugs):
52
+ extractors = txt2stix.extractions.parse_extraction_config(
53
+ txt2stix.txt2stix.INCLUDES_PATH
54
+ )
55
+ ai_based_extractors = []
56
+ for slug in slugs:
57
+ if extractors[slug].type == 'ai':
58
+ ai_based_extractors.append(slug)
59
+
60
+ if ai_based_extractors:
61
+ raise ValidationError(f'AI based extractors `{ai_based_extractors}` used when `ai_settings_extractions` is not configured')
62
+
63
+ class ProfileSerializer(serializers.ModelSerializer):
64
+ id = serializers.UUIDField(read_only=True)
65
+ identity_id = serializers.CharField(
66
+ max_length=46,
67
+ validators=[lambda stix_id: validate_stix_id(stix_id, "identity")],
68
+ allow_null=True,
69
+ required=False,
70
+ help_text="STIX Identity ID (e.g `identity--19686d47-3a50-48a0-8ef0-f3e0f8a4bd99`)"
71
+ )
72
+
73
+ ai_settings_relationships = serializers.CharField(
74
+ validators=[validate_model],
75
+ help_text='(required if AI relationship enabled): passed in format `provider:model`. Can only pass one model at this time.',
76
+ allow_null=True,
77
+ required=False,
78
+ )
79
+ ai_settings_extractions = serializers.ListField(
80
+ child=serializers.CharField(max_length=256, validators=[validate_model]),
81
+ help_text='(required if AI extractions enabled) passed in format provider[:model] e.g. openai:gpt4o. Can pass more than one value to get extractions from multiple providers. model part is optional',
82
+ required=False,
83
+ )
84
+ ai_summary_provider = serializers.CharField(
85
+ validators=[validate_model],
86
+ help_text='you can optionally get an AI model to produce a summary of the blog. You must pass the request in format `provider:model`. model part is optional',
87
+ allow_null=True,
88
+ required=False,
89
+ )
90
+ ai_content_check_provider = serializers.CharField(
91
+ max_length=256, validators=[validate_model],
92
+ allow_null=True,
93
+ required=False,
94
+ help_text='check content before proceeding'
95
+ )
96
+ ai_create_attack_flow = serializers.BooleanField(required=False, help_text="should create attack-flow (default is `false`)", default=True)
97
+ extractions = serializers.ListField(
98
+ min_length=1,
99
+ child=serializers.CharField(max_length=256, validators=[partial(validate_extractor, 'extractor', ["ai", "pattern", "lookup"])]),
100
+ help_text="extraction id(s)",
101
+ )
102
+ defang = serializers.BooleanField(help_text='If the text should be defanged before processing')
103
+
104
+
105
+ ignore_embedded_relationships = serializers.BooleanField(required=False, help_text="applies to SDO and SCO types (default is `false`)")
106
+ ignore_embedded_relationships_sro = serializers.BooleanField(required=False, help_text="sets wether to ignore embedded refs on `relationship` object types (default is `true`)")
107
+ ignore_embedded_relationships_smo = serializers.BooleanField(required=False, help_text="sets wether to ignore embedded refs on SMO object types (`marking-definition`, `extension-definition`, `language-content`) (default is `true`)")
108
+ generate_pdf = serializers.BooleanField(required=False, help_text="Whether or not to generate pdf file for input, applies to both stixify and obstracts (default is `false`)")
109
+
110
+ class Meta:
111
+ model = Profile
112
+ fields = "__all__"
113
+
114
+ def validate(self, attrs):
115
+ if not attrs.get('ai_settings_relationships'):
116
+ if attrs['relationship_mode'] == 'ai':
117
+ raise ValidationError('`ai_settings_relationships` is required when `relationship_mode == "ai"`')
118
+ if attrs['ai_create_attack_flow']:
119
+ raise ValidationError('`ai_settings_relationships` is required when `ai_create_attack_flow == true`')
120
+ if not attrs.get('ai_settings_extractions'):
121
+ uses_ai(attrs['extractions'])
122
+ return super().validate(attrs)
123
+
124
+
125
+
126
+ DEFAULT_400_ERROR = OpenApiResponse(
127
+ CommonErrorSerializer,
128
+ "The server did not understand the request",
129
+ [
130
+ OpenApiExample(
131
+ "http400",
132
+ {"message": " The server did not understand the request", "code": 400},
133
+ )
134
+ ],
135
+ )
136
+
137
+
138
+ DEFAULT_404_ERROR = OpenApiResponse(
139
+ CommonErrorSerializer,
140
+ "Resource not found",
141
+ [
142
+ OpenApiExample(
143
+ "http404",
144
+ {
145
+ "message": "The server cannot find the resource you requested",
146
+ "code": 404,
147
+ },
148
+ )
149
+ ],
150
+ )
151
+
152
+
153
+ ##
154
+
155
+
156
+
157
+ class Txt2stixExtractorSerializer(serializers.Serializer):
158
+ id = serializers.CharField(label='The `id` of the extractor')
159
+ name = serializers.CharField()
160
+ type = serializers.CharField()
161
+ description = serializers.CharField(required=False, allow_null=True)
162
+ notes = serializers.CharField(required=False, allow_null=True)
163
+ file = serializers.CharField(required=False, allow_null=True)
164
+ created = serializers.CharField(required=False, allow_null=True)
165
+ modified = serializers.CharField(required=False, allow_null=True)
166
+ created_by = serializers.CharField(required=False, allow_null=True)
167
+ version = serializers.CharField()
168
+ stix_mapping = serializers.CharField(required=False, allow_null=True)
169
+ dogesec_web = serializers.BooleanField(required=False, allow_null=True)
170
+
171
+ @classmethod
172
+ def all_extractors(cls, types):
173
+ retval = {}
174
+ extractors = txt2stix.extractions.parse_extraction_config(
175
+ txt2stix.txt2stix.INCLUDES_PATH
176
+ ).values()
177
+ for extractor in extractors:
178
+ if extractor.type in types:
179
+ retval[extractor.slug] = cls.cleanup_extractor(extractor)
180
+ if extractor.file:
181
+ retval[extractor.slug]["file"] = urljoin(conf.TXT2STIX_INCLUDE_URL, str(extractor.file.relative_to(txt2stix.txt2stix.INCLUDES_PATH)))
182
+ return retval
183
+
184
+ @classmethod
185
+ def cleanup_extractor(cls, dct: dict):
186
+ KEYS = cls(data={}).get_fields()
187
+ retval = {"id": dct["slug"]}
188
+ for key in KEYS:
189
+ if key in dct:
190
+ retval[key] = dct[key]
191
+ return retval
192
+
@@ -0,0 +1,252 @@
1
+ import io
2
+ import json
3
+ import logging
4
+ import os
5
+ from pathlib import Path
6
+ import shutil
7
+ import uuid
8
+ from attr import dataclass
9
+
10
+ from dogesec_commons.stixifier.summarizer import parse_summarizer_model
11
+
12
+ from ..objects import db_view_creator
13
+ from . import models
14
+ import tempfile
15
+ from file2txt.converter import get_parser_class
16
+ from txt2stix import get_include_path
17
+ from txt2stix.stix import txt2stixBundler
18
+ from txt2stix.ai_extractor import BaseAIExtractor
19
+ from stix2arango.stix2arango import Stix2Arango
20
+ from django.conf import settings
21
+ from txt2stix.ai_extractor.utils import DescribesIncident
22
+
23
+
24
+ from file2txt.converter import Fanger, get_parser_class
25
+ from file2txt.parsers.core import BaseParser
26
+ import txt2stix.utils
27
+ import txt2stix.txt2stix
28
+ import txt2stix.extractions
29
+
30
+
31
+ def all_extractors(names, _all=False):
32
+ retval = {}
33
+ extractors = txt2stix.extractions.parse_extraction_config(
34
+ get_include_path()
35
+ ).values()
36
+ for extractor in extractors:
37
+ if _all or extractor.slug in names:
38
+ retval[extractor.slug] = extractor
39
+ return retval
40
+
41
+
42
+ @dataclass
43
+ class ReportProperties:
44
+ name: str = None
45
+ identity: dict = None
46
+ tlp_level: str = None
47
+ confidence: int = None
48
+ labels: list[str] = None
49
+ created: str = None
50
+ kwargs: dict = {}
51
+
52
+
53
+ class StixifyProcessor:
54
+ def __init__(
55
+ self,
56
+ file: io.FileIO,
57
+ profile: models.Profile,
58
+ job_id: uuid.UUID,
59
+ post=None,
60
+ file2txt_mode="html",
61
+ report_id=None,
62
+ base_url=None,
63
+ always_extract=False,
64
+ ) -> None:
65
+ self.job_id = str(job_id)
66
+ self.extra_data = dict()
67
+ self.report_id = report_id
68
+ self.profile = profile
69
+ self.collection_name = "stixify"
70
+ self.tmpdir = Path(tempfile.mkdtemp(prefix="stixify-"))
71
+ self.file2txt_mode = file2txt_mode
72
+ self.md_images = []
73
+ self.processed_image_base_url = ""
74
+ self.base_url = base_url
75
+ self.incident: DescribesIncident = None
76
+ self.summary = None
77
+
78
+ self.filename = self.tmpdir / Path(file.name).name
79
+ self.filename.write_bytes(file.read())
80
+
81
+ self.task_name = f"{self.profile.name}/{job_id}/{self.report_id}"
82
+ self.always_extract = always_extract
83
+
84
+ def setup(self, /, report_prop: ReportProperties, extra={}):
85
+ self.extra_data.update(extra)
86
+ self.report_prop = report_prop
87
+
88
+ def file2txt(self):
89
+ parser_class = get_parser_class(self.file2txt_mode, self.filename.name)
90
+ converter: BaseParser = parser_class(
91
+ self.filename,
92
+ self.file2txt_mode,
93
+ self.profile.extract_text_from_image,
94
+ settings.GOOGLE_VISION_API_KEY,
95
+ base_url=self.base_url,
96
+ )
97
+ output = converter.convert(
98
+ processed_image_base_url=self.processed_image_base_url
99
+ )
100
+ if self.profile.defang:
101
+ output = Fanger(output).defang()
102
+ for name, img in converter.images.items():
103
+ img_file = io.BytesIO()
104
+ img_file.name = name
105
+ img.save(img_file, format="png")
106
+ self.md_images.append(img_file)
107
+
108
+ self.output_md = output
109
+ self.md_file = self.tmpdir / f"post_md_{self.report_id or 'file'}.md"
110
+ self.md_file.write_text(self.output_md)
111
+
112
+ def txt2stix(self):
113
+ extractors = all_extractors(self.profile.extractions)
114
+ extractors_map = {}
115
+ for extractor in extractors.values():
116
+ if extractors_map.get(extractor.type):
117
+ extractors_map[extractor.type][extractor.slug] = extractor
118
+ else:
119
+ extractors_map[extractor.type] = {extractor.slug: extractor}
120
+
121
+ self.bundler = txt2stixBundler(
122
+ self.report_prop.name,
123
+ identity=self.report_prop.identity,
124
+ tlp_level=self.report_prop.tlp_level,
125
+ confidence=self.report_prop.confidence,
126
+ labels=self.report_prop.labels,
127
+ description=self.output_md,
128
+ extractors=extractors,
129
+ report_id=self.report_id,
130
+ created=self.report_prop.created,
131
+ **self.report_prop.kwargs,
132
+ )
133
+ self.extra_data["_stixify_report_id"] = str(self.bundler.report.id)
134
+ input_text = txt2stix.utils.remove_links(
135
+ self.output_md,
136
+ self.profile.ignore_image_refs,
137
+ self.profile.ignore_link_refs,
138
+ )
139
+ ai_extractors = [
140
+ txt2stix.txt2stix.parse_model(model_str)
141
+ for model_str in self.profile.ai_settings_extractions
142
+ ]
143
+ self.txt2stix_data = txt2stix.txt2stix.run_txt2stix(
144
+ self.bundler,
145
+ input_text,
146
+ extractors_map,
147
+ ai_content_check_provider=self.profile.ai_content_check_provider
148
+ and txt2stix.txt2stix.parse_model(self.profile.ai_content_check_provider),
149
+ ai_create_attack_flow=self.profile.ai_create_attack_flow,
150
+ input_token_limit=settings.INPUT_TOKEN_LIMIT,
151
+ ai_settings_extractions=ai_extractors,
152
+ ai_settings_relationships=self.profile.ai_settings_relationships
153
+ and txt2stix.txt2stix.parse_model(self.profile.ai_settings_relationships),
154
+ relationship_mode=self.profile.relationship_mode,
155
+ ignore_extraction_boundary=self.profile.ignore_extraction_boundary,
156
+ always_extract=self.always_extract,
157
+ )
158
+ self.incident = self.txt2stix_data.content_check
159
+ return self.bundler
160
+
161
+ def summarize(self):
162
+ if self.profile.ai_summary_provider:
163
+ logging.info(
164
+ f"summarizing report {self.report_id} using `{self.profile.ai_summary_provider}`"
165
+ )
166
+ try:
167
+ report = self.bundler.report
168
+ summary_extractor = parse_summarizer_model(
169
+ self.profile.ai_summary_provider
170
+ )
171
+ self.summary = summary_extractor.summarize(self.output_md)
172
+ summary_note_obj = {
173
+ "type": "note",
174
+ "spec_version": "2.1",
175
+ "id": report.id.replace("report", "note"),
176
+ "created": report.created,
177
+ "modified": report.modified,
178
+ "created_by_ref": report.created_by_ref,
179
+ "external_references": [
180
+ {
181
+ "source_name": "txt2stix_ai_summary_provider",
182
+ "external_id": self.profile.ai_summary_provider,
183
+ },
184
+ ],
185
+ "abstract": f"AI Summary: {report.name}",
186
+ "content": self.summary,
187
+ "object_refs": [report.id],
188
+ "object_marking_refs": report.object_marking_refs,
189
+ "labels": report.labels,
190
+ "confidence": report.confidence,
191
+ }
192
+
193
+ self.bundler.add_ref(summary_note_obj)
194
+ self.bundler.add_ref(
195
+ self.bundler.new_relationship(
196
+ summary_note_obj["id"],
197
+ report.id,
198
+ relationship_type="summary-of",
199
+ description=f"AI generated summary for {report.name}",
200
+ external_references=summary_note_obj["external_references"],
201
+ )
202
+ )
203
+ except BaseException as e:
204
+ print(f"got err {e}")
205
+ logging.info(f"got err {e}", exc_info=True)
206
+ return self.summary
207
+
208
+ def process(self) -> str:
209
+ logging.info(f"running file2txt on {self.task_name}")
210
+ self.file2txt()
211
+ logging.info(f"running txt2stix on {self.task_name}")
212
+ bundler = self.txt2stix()
213
+ self.summarize()
214
+ self.write_bundle(bundler)
215
+ logging.info(f"uploading {self.task_name} to arangodb via stix2arango")
216
+ self.upload_to_arango()
217
+ return bundler.report.id
218
+
219
+ def write_bundle(self, bundler: txt2stixBundler):
220
+ bundle = json.loads(bundler.to_json())
221
+ self.bundle = json.dumps(bundle, indent=4)
222
+ self.bundle_file = self.tmpdir / f"bundle_{self.report_id}.json"
223
+ self.bundle_file.write_text(self.bundle)
224
+
225
+ def upload_to_arango(self):
226
+ s2a = Stix2Arango(
227
+ file=str(self.bundle_file),
228
+ database=settings.ARANGODB_DATABASE,
229
+ collection=self.collection_name,
230
+ stix2arango_note=f"stixifier-report--{self.report_id}",
231
+ ignore_embedded_relationships=self.profile.ignore_embedded_relationships,
232
+ ignore_embedded_relationships_smo=self.profile.ignore_embedded_relationships_smo,
233
+ ignore_embedded_relationships_sro=self.profile.ignore_embedded_relationships_sro,
234
+ host_url=settings.ARANGODB_HOST_URL,
235
+ username=settings.ARANGODB_USERNAME,
236
+ password=settings.ARANGODB_PASSWORD,
237
+ )
238
+ s2a.arangodb_extra_data.update(self.extra_data)
239
+ db_view_creator.link_one_collection(
240
+ s2a.arango.db,
241
+ settings.ARANGODB_DATABASE_VIEW,
242
+ f"{self.collection_name}_edge_collection",
243
+ )
244
+ db_view_creator.link_one_collection(
245
+ s2a.arango.db,
246
+ settings.ARANGODB_DATABASE_VIEW,
247
+ f"{self.collection_name}_vertex_collection",
248
+ )
249
+ s2a.run()
250
+
251
+ def __del__(self):
252
+ shutil.rmtree(self.tmpdir)
@@ -0,0 +1,62 @@
1
+ from typing import Type
2
+ from llama_index.core import PromptTemplate
3
+ from txt2stix.ai_extractor import ALL_AI_EXTRACTORS, BaseAIExtractor
4
+ from llama_index.core.response_synthesizers import SimpleSummarize
5
+ from rest_framework.validators import ValidationError
6
+
7
+ prompt = PromptTemplate("""
8
+ <persona>
9
+
10
+ You are a cyber-security threat intelligence analyst responsible for analysing intelligence. You have a deep understanding of cybersecurity concepts and threat intelligence. You are responsible for simplifying long intelligence reports into concise summaries for other to quickly understand the contents.
11
+
12
+ </persona>
13
+
14
+ <requirement>
15
+
16
+ Using the MARKDOWN of the report provided in <document>, provide an executive summary of it containing no more than one paragraphs.
17
+
18
+ IMPORTANT: the output should be structured as markdown text.
19
+ IMPORTANT: do not put output in code block
20
+
21
+ </requirement>
22
+
23
+ <accuracy>
24
+
25
+ Think about your answer first before you respond.
26
+
27
+ </accuracy>
28
+
29
+ <document>
30
+ {context_str}
31
+ </document>
32
+
33
+ """)
34
+
35
+ def get_provider(klass: Type[BaseAIExtractor]):
36
+ class SummarizerSession(klass, provider="~"+klass.provider, register=False):
37
+ system_prompt = """
38
+ You are a cyber-security threat intelligence analyst responsible for analysing intelligence.
39
+ You have a deep understanding of cybersecurity concepts and threat intelligence.
40
+ You are responsible for simplifying long intelligence reports into concise summaries for other to quickly understand the contents.
41
+ """
42
+ def summarize(self, text):
43
+ summarizer = SimpleSummarize(llm=self.llm, text_qa_template=prompt)
44
+ return summarizer.get_response('', text_chunks=[text])
45
+ SummarizerSession.__name__ = klass.__name__.replace('Extractor', 'Summarizer')
46
+ return SummarizerSession
47
+
48
+
49
+ def parse_summarizer_model(value: str):
50
+ try:
51
+ splits = value.split(':', 1)
52
+ provider = splits[0]
53
+ if provider not in ALL_AI_EXTRACTORS:
54
+ raise ValidationError(f"invalid summary provider in `{value}`, must be one of [{list(ALL_AI_EXTRACTORS)}]")
55
+ provider = get_provider(ALL_AI_EXTRACTORS[provider])
56
+ if len(splits) == 2:
57
+ return provider(model=splits[1])
58
+ return provider()
59
+ except ValidationError:
60
+ raise
61
+ except BaseException as e:
62
+ raise ValidationError(f'invalid model: {value}') from e