cldk 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. cldk/__init__.py +3 -0
  2. cldk/analysis/__init__.py +8 -0
  3. cldk/analysis/analysis_level.py +9 -0
  4. cldk/analysis/c/__init__.py +0 -0
  5. cldk/analysis/c/treesitter/__init__.py +3 -0
  6. cldk/analysis/c/treesitter/c_sitter.py +510 -0
  7. cldk/analysis/call_graph.py +69 -0
  8. cldk/analysis/go/__init__.py +0 -0
  9. cldk/analysis/go/treesitter/__init__.py +3 -0
  10. cldk/analysis/go/treesitter/go_sitter.py +451 -0
  11. cldk/analysis/java/__init__.py +3 -0
  12. cldk/analysis/java/codeanalyzer/__init__.py +9 -0
  13. cldk/analysis/java/codeanalyzer/bin/.gitignore +1 -0
  14. cldk/analysis/java/codeanalyzer/bin/__init__.py +0 -0
  15. cldk/analysis/java/codeanalyzer/codeanalyzer.py +927 -0
  16. cldk/analysis/java/codeanalyzer/jar/.gitignore +1 -0
  17. cldk/analysis/java/codeanalyzer/jar/__init__.py +0 -0
  18. cldk/analysis/java/codeanalyzer/jar/codeanalyzer.jar +0 -0
  19. cldk/analysis/java/codeql/__init__.py +3 -0
  20. cldk/analysis/java/codeql/backend.py +148 -0
  21. cldk/analysis/java/codeql/codeql.py +238 -0
  22. cldk/analysis/java/java.py +597 -0
  23. cldk/analysis/java/treesitter/__init__.py +2 -0
  24. cldk/analysis/java/treesitter/javasitter.py +444 -0
  25. cldk/analysis/javascript/__init__.py +0 -0
  26. cldk/analysis/javascript/treesitter/__init__.py +3 -0
  27. cldk/analysis/javascript/treesitter/javascript_sitter.py +457 -0
  28. cldk/analysis/program_dependence_graph.py +6 -0
  29. cldk/analysis/python/__init__.py +3 -0
  30. cldk/analysis/python/python.py +122 -0
  31. cldk/analysis/python/treesitter/__init__.py +3 -0
  32. cldk/analysis/python/treesitter/python_sitter.py +352 -0
  33. cldk/analysis/symbol_table.py +84 -0
  34. cldk/analysis/system_dependence_graph.py +6 -0
  35. cldk/core.py +149 -0
  36. cldk/models/__init__.py +0 -0
  37. cldk/models/c/__init__.py +0 -0
  38. cldk/models/c/models.py +111 -0
  39. cldk/models/go/__init__.py +0 -0
  40. cldk/models/go/models.py +101 -0
  41. cldk/models/java/__init__.py +18 -0
  42. cldk/models/java/constants_namespace.py +16 -0
  43. cldk/models/java/models.py +368 -0
  44. cldk/models/javascript/__init__.py +0 -0
  45. cldk/models/javascript/models.py +95 -0
  46. cldk/models/python/__init__.py +0 -0
  47. cldk/models/python/models.py +65 -0
  48. cldk/models/treesitter/__init__.py +3 -0
  49. cldk/models/treesitter/models.py +52 -0
  50. cldk/utils/__init__.py +0 -0
  51. cldk/utils/analysis_engine.py +4 -0
  52. cldk/utils/exceptions/__init__.py +13 -0
  53. cldk/utils/exceptions/exceptions.py +40 -0
  54. cldk/utils/logging.py +0 -0
  55. cldk/utils/sanitization/__init__.py +0 -0
  56. cldk/utils/sanitization/java/TreesitterSanitizer.py +323 -0
  57. cldk/utils/sanitization/java/TreesitterUtils.py +505 -0
  58. cldk/utils/sanitization/java/__init__.py +1 -0
  59. cldk/utils/treesitter/__init__.py +0 -0
  60. cldk/utils/treesitter/tree_sitter_utils.py +48 -0
  61. cldk-0.1.1.dist-info/LICENSE +201 -0
  62. cldk-0.1.1.dist-info/METADATA +306 -0
  63. cldk-0.1.1.dist-info/RECORD +64 -0
  64. cldk-0.1.1.dist-info/WHEEL +4 -0
cldk/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .core import CLDK
2
+
3
+ __all__ = ["CLDK"]
@@ -0,0 +1,8 @@
1
+
2
+ from .call_graph import CallGraph
3
+ from .program_dependence_graph import ProgramDependenceGraph
4
+ from .system_dependence_graph import SystemDependenceGraph
5
+ from .symbol_table import SymbolTable
6
+ from .analysis_level import AnalysisLevel
7
+
8
+ __all__ = ["CallGraph", "ProgramDependenceGraph", "SystemDependenceGraph", "SymbolTable", "AnalysisLevel"]
@@ -0,0 +1,9 @@
1
+ from enum import Enum
2
+
3
+
4
+ class AnalysisLevel(str, Enum):
5
+ """Analysis levels"""
6
+ symbol_table = "symbol-table"
7
+ call_graph = "call-graph"
8
+ program_dependency_graph = "program-dependency-graph"
9
+ system_dependency_graph = "system-dependency-graph"
File without changes
@@ -0,0 +1,3 @@
1
+ from cldk.analysis.c.treesitter.c_sitter import CSitter
2
+
3
+ __all__ = ["CSitter"]
@@ -0,0 +1,510 @@
1
+ from typing import List
2
+ from tree_sitter import Language, Parser, Query, Node
3
+ import tree_sitter_c as tsc
4
+
5
+ from cldk.models.c.models import CFunction, CImport, CParameter, CTranslationUnit, COutput
6
+ from cldk.models.treesitter import Captures
7
+
8
+
9
+ class CSitter:
10
+ """
11
+ Tree sitter for C use cases.
12
+ """
13
+
14
+ def __init__(self) -> None:
15
+ self.language: Language = Language(tsc.language())
16
+ self.parser: Parser = Parser(self.language)
17
+
18
+ def get_all_functions(self, code: str) -> List[CFunction]:
19
+ """
20
+ Get all the functions in the provided code.
21
+
22
+ Parameters
23
+ ----------
24
+ code: the code you want to analyse.
25
+
26
+ Returns
27
+ -------
28
+ List[CFunction]
29
+ returns all the function details within the provided code.
30
+ """
31
+
32
+ return [self.__get_function_details(code, capture.node) for capture in self.__get_function_nodes(code)]
33
+
34
+ def get_imports(self, code: str) -> List[CImport]:
35
+ """
36
+ Get all the imports in the provided code.
37
+
38
+ Parameters
39
+ ----------
40
+ code: the code you want to analyse.
41
+
42
+ Returns
43
+ -------
44
+ List[CImport]
45
+ returns all the imports within the provided code.
46
+ """
47
+
48
+ query = """(preproc_include) @import"""
49
+ captures: Captures = self.__frame_query_and_capture_output(query, code)
50
+ imports: List[CImport] = []
51
+ for capture in captures:
52
+ path_node: Node = capture.node.child_by_field_name("path")
53
+ text: str = path_node.text.decode()
54
+ if path_node.type == "system_lib_string":
55
+ imports.append(CImport(value=text[1 : len(text) - 1], is_system=True))
56
+ elif path_node.type == "string_literal":
57
+ imports.append(CImport(value=text[1 : len(text) - 1], is_system=False))
58
+ else:
59
+ imports.append(CImport(value=text, is_system=False))
60
+
61
+ return imports
62
+
63
+ def get_translation_unit_details(self, code: str) -> CTranslationUnit:
64
+ """
65
+ Given the code of a C translation unit, return the details.
66
+
67
+ Parameters
68
+ ----------
69
+ code : str
70
+ The source code of the translation unit.
71
+
72
+ Returns
73
+ -------
74
+ CTranslationUnit
75
+ The details of the given translation unit.
76
+ """
77
+
78
+ return CTranslationUnit(
79
+ functions=self.get_all_functions(code),
80
+ imports=self.get_imports(code),
81
+ )
82
+
83
+ def __get_function_details(self, original_code: str, node: Node) -> CFunction:
84
+ """
85
+ Extract the details of a function from a tree-sitter node.
86
+
87
+ Parameters
88
+ ----------
89
+ original_code : str
90
+ The original code, used to extract the tree-sitter node.
91
+ node : Node
92
+ The function tree-sitter node we want to evaluate.
93
+
94
+ Returns
95
+ -------
96
+ CFunction
97
+ The extracted details of the function.
98
+ """
99
+
100
+ nb_pointers = self.__count_pointers(node.child_by_field_name("declarator"))
101
+ return_type: str = self.__get_function_return_type(node)
102
+ if return_type != "function":
103
+ return_type = return_type + nb_pointers * "*"
104
+
105
+ output: COutput = COutput(
106
+ type=return_type,
107
+ is_reference=return_type == "function" or nb_pointers > 0,
108
+ qualifiers=self.__get_type_qualifiers(node),
109
+ )
110
+
111
+ return CFunction(
112
+ name=self.__get_function_name(node),
113
+ code=node.text.decode(),
114
+ start_line=node.start_point[0],
115
+ end_line=node.end_point[0],
116
+ signature=self.__get_function_signature(original_code, node),
117
+ parameters=self.__get_function_parameters(node),
118
+ output=output,
119
+ comment=self.__get_comment(node),
120
+ specifiers=self.__get_storage_class_specifiers(node),
121
+ )
122
+
123
+ def __get_function_parameters(self, function_node: Node) -> List[CParameter]:
124
+ """
125
+ Extract the parameters of a tree-sitter function node.
126
+
127
+ Parameters
128
+ ----------
129
+ function_node : Node
130
+ The function node whose parameters we want to extract.
131
+
132
+ Returns
133
+ -------
134
+ List[CParameter]
135
+ The parameters of the given function node.
136
+ """
137
+
138
+ query = """(function_declarator ((parameter_list) @function.parameters))"""
139
+ parameters_list: Captures = self.__query_node_and_capture_output(query, function_node)
140
+
141
+ if not parameters_list:
142
+ return []
143
+
144
+ params: dict[str, CParameter] = self.__get_parameter_details(parameters_list)
145
+
146
+ # for old-style function definition:
147
+ # https://www.gnu.org/software/c-intro-and-ref/manual/html_node/Old_002dStyle-Function-Definitions.html
148
+
149
+ for child in function_node.children:
150
+ if child.type == "declaration":
151
+ for tup in self.__extract_parameter_declarations(child):
152
+ name, parameter = tup
153
+ params[name] = parameter
154
+
155
+ # filter out params without type
156
+ return [param[1] for param in params.items() if param[1].type]
157
+
158
+ def __frame_query_and_capture_output(self, query: str, code_to_process: str) -> Captures:
159
+ """Frame a query for the tree-sitter parser.
160
+
161
+ Parameters
162
+ ----------
163
+ query : str
164
+ The query to frame.
165
+ code_to_process : str
166
+ The code to process.
167
+
168
+ Returns
169
+ -------
170
+ Captures
171
+ The list of tree-sitter captures.
172
+ """
173
+
174
+ framed_query: Query = self.language.query(query)
175
+ tree = self.parser.parse(bytes(code_to_process, "utf-8"))
176
+ return Captures(framed_query.captures(tree.root_node))
177
+
178
+ def __query_node_and_capture_output(self, query: str, node: Node) -> Captures:
179
+ """Frame a query for the tree-sitter parser and query the given tree-sitter node.
180
+
181
+ Parameters
182
+ ----------
183
+ query : str
184
+ The query to frame.
185
+ node : Node
186
+ The root node used for querying.
187
+
188
+ Returns
189
+ -------
190
+ Captures
191
+ The list of tree-sitter captures.
192
+ """
193
+
194
+ framed_query: Query = self.language.query(query)
195
+ return Captures(framed_query.captures(node))
196
+
197
+ def __get_function_nodes(self, code: str) -> Captures:
198
+ """Parse the given code and extract tree-sitter function nodes.
199
+
200
+ Parameters
201
+ ----------
202
+ code : str
203
+ The input code.
204
+
205
+ Returns
206
+ -------
207
+ Captures
208
+ The list of tree-sitter captures.
209
+ """
210
+
211
+ query = """((function_definition) @function)"""
212
+ return self.__frame_query_and_capture_output(query, code)
213
+
214
+ def __get_function_name(self, function_node: Node) -> str:
215
+ """
216
+ Extract the function name from a tree-sitter function node.
217
+
218
+ Parameters
219
+ ----------
220
+ function_node : Node
221
+ The function node whose name we want to extract.
222
+
223
+ Returns
224
+ -------
225
+ str
226
+ The name of the function.
227
+ """
228
+
229
+ query = """(function_declarator ((identifier) @function.name))"""
230
+ function_name_node: Node = self.__query_node_and_capture_output(query, function_node)[0].node
231
+ return function_name_node.text.decode()
232
+
233
+ def __get_function_return_type(self, function_node: Node) -> str:
234
+ """
235
+ Extracts the return type of a tree-sitter function node.
236
+
237
+ Parameters
238
+ ----------
239
+ function_node : Node
240
+ The function node whose return type we want to extract.
241
+
242
+ Returns
243
+ -------
244
+ str
245
+ The return type of a function or function, if the return is a function pointer.
246
+ """
247
+
248
+ # TODO: not sure if this is correct
249
+ # if there's more that 1 function declaration type, we consider it a function pointer
250
+ if self.__count_function_declarations(function_node.child_by_field_name("declarator")) > 1:
251
+ return "function"
252
+
253
+ type_node = function_node.child_by_field_name("type")
254
+
255
+ return type_node.text.decode() if type_node.type != "struct_specifier" else type_node.child_by_field_name("name").text.decode()
256
+
257
+ def __get_function_signature(self, code: str, function_node: Node) -> str:
258
+ """
259
+ Extracts the function signature from a tree-sitter function node.
260
+
261
+ Parameters
262
+ ----------
263
+ code : str
264
+ The original code that was used to extract the function node.
265
+ function_node : Node
266
+ The function node whose signature we want to extract.
267
+
268
+ Returns
269
+ -------
270
+ str
271
+ The signature of the function.
272
+ """
273
+
274
+ body_node: Node = function_node.child_by_field_name("body")
275
+ start_byte = function_node.start_byte
276
+ end_byte = body_node.start_byte
277
+ code_bytes = bytes(code, "utf-8")
278
+ signature = code_bytes[start_byte:end_byte]
279
+
280
+ return signature.decode().strip()
281
+
282
+ def __get_type_qualifiers(self, node: Node) -> List[str]:
283
+ """
284
+ Extract the type qualifiers from a given tree-sitter node.
285
+
286
+ Paramaters
287
+ ----------
288
+ node : Node
289
+ The node whose type qulifiers we want to extract.
290
+
291
+ Returns
292
+ -------
293
+ List[str]
294
+ The list of type qualifiers.
295
+ """
296
+
297
+ if not node or not node.children:
298
+ return []
299
+
300
+ return [child.text.decode() for child in node.children if child.type == "type_qualifier"]
301
+
302
+ def __get_storage_class_specifiers(self, node: Node) -> List[str]:
303
+ """
304
+ Extract the storage class specifiers from a given tree-sitter node.
305
+
306
+ Paramaters
307
+ ----------
308
+ node : Node
309
+ The node whose storage class speciers we want to extract.
310
+
311
+ Returns
312
+ -------
313
+ List[str]
314
+ The list of storage class specifiers.
315
+ """
316
+
317
+ if not node or not node.children:
318
+ return []
319
+
320
+ return [child.text.decode() for child in node.children if child.type == "storage_class_specifier"]
321
+
322
+ def __count_pointers(self, node: Node) -> int:
323
+ """
324
+ Count the number of consecutive pointers for a tree-sitter node.
325
+
326
+ Parameters
327
+ ----------
328
+ node : Node
329
+ The tree-siter node we want to evaluate.
330
+
331
+ Returns
332
+ -------
333
+ int
334
+ The number of consecutive pointers present in the given tree-sitter node.
335
+ """
336
+
337
+ count = 0
338
+ curr_node = node
339
+ while curr_node and curr_node.type == "pointer_declarator":
340
+ count += 1
341
+ curr_node = curr_node.child_by_field_name("declarator")
342
+
343
+ return count
344
+
345
+ def __count_function_declarations(self, node: Node) -> int:
346
+ """
347
+ Counts the number of function declaration nodes for a tree-sitter node.
348
+
349
+ Parameters
350
+ ----------
351
+ node : Node
352
+ The tree-sitter node we want to evaluate.
353
+
354
+ Returns
355
+ -------
356
+ int
357
+ The number of function delacration nodes present in the given tree-sitter node.
358
+ """
359
+
360
+ if not node or not node.children:
361
+ return 0
362
+
363
+ sum = 1 if node.type == "function_declarator" else 0
364
+ for child in node.children:
365
+ sum += self.__count_function_declarations(child)
366
+
367
+ return sum
368
+
369
+ def __get_parameter_details(self, parameters_list: Captures) -> dict[str, CParameter]:
370
+ """
371
+ Extract parameter details from a list of tree-sitter parameters.
372
+
373
+ Parameters
374
+ ----------
375
+ parameters_list : Captures
376
+ The parameter list node captures.
377
+
378
+ Returns
379
+ -------
380
+ Dict[str, CParameter]
381
+ A dictionary of parameter details.
382
+ """
383
+
384
+ params: dict[str, CParameter] = {}
385
+
386
+ for parameters in parameters_list:
387
+ if not parameters or not parameters.node.children:
388
+ continue
389
+ for param in parameters.node.children:
390
+ # old c style
391
+ if param.type == "identifier":
392
+ name, parameter = self.__extract_simple_parameter(param, "")
393
+ params[name] = parameter
394
+ elif param.type == "variadic_parameter":
395
+ name, parameter = self.__extract_simple_parameter(param, "variadic")
396
+ params[name] = parameter
397
+ elif param.type == "parameter_declaration":
398
+ for tup in self.__extract_parameter_declarations(param):
399
+ name, parameter = tup
400
+ params[name] = parameter
401
+
402
+ return params
403
+
404
+ def __extract_simple_parameter(self, node: Node, parameter_type: str) -> tuple[str, CParameter]:
405
+ name: str = node.text.decode()
406
+ parameter: CParameter = CParameter(
407
+ type=parameter_type,
408
+ qualifiers=[],
409
+ specifiers=[],
410
+ is_reference=False,
411
+ name=name,
412
+ )
413
+
414
+ return (name, parameter)
415
+
416
+ def __extract_parameter_declarations(self, node: Node) -> List[tuple[str, CParameter]]:
417
+ query = """((identifier) @name)"""
418
+ captures: Captures = self.__query_node_and_capture_output(query, node)
419
+
420
+ # no name found, skip this node
421
+ if len(captures) == 0:
422
+ return []
423
+
424
+ parameters: List[tuple[str, CParameter]] = []
425
+ for capture in captures:
426
+ parameters.append(self.__extract_parameter_declaration(node, capture.node))
427
+
428
+ return parameters
429
+
430
+ def __extract_parameter_declaration(self, parent_node: Node, identifier_node: Node) -> tuple[str, CParameter]:
431
+ name = identifier_node.text.decode()
432
+
433
+ nb_function_declarations = self.__count_function_declarations(parent_node)
434
+ # we have a function pointer
435
+ if nb_function_declarations > 0:
436
+ parameter: CParameter = CParameter(
437
+ type="function",
438
+ qualifiers=[], # TODO: not sure if this is correct
439
+ specifiers=[], # TODO: not sure if this is correct
440
+ is_reference=True,
441
+ name=name,
442
+ )
443
+ return (name, parameter)
444
+
445
+ type_node = parent_node.child_by_field_name("type")
446
+
447
+ param_type: str = type_node.text.decode() if type_node.type != "struct_specifier" else type_node.child_by_field_name("name").text.decode()
448
+ type_augmentor = self.__augment_type(identifier_node, parent_node.type)
449
+
450
+ parameter = CParameter(
451
+ type=param_type + type_augmentor,
452
+ qualifiers=self.__get_type_qualifiers(parent_node),
453
+ specifiers=self.__get_storage_class_specifiers(parent_node),
454
+ is_reference=type_augmentor.startswith("*"),
455
+ name=name,
456
+ )
457
+
458
+ return (name, parameter)
459
+
460
+ def __augment_type(self, identifier_node: Node, stop_node_type: str) -> str:
461
+ """
462
+ Augment types with pointer and array details.
463
+ """
464
+
465
+ # not sure about this one
466
+ type_augmentor = ""
467
+ pointer_augmentor = ""
468
+ array_augmentor = ""
469
+ curr_node = identifier_node.parent
470
+ while curr_node and curr_node.type != stop_node_type:
471
+ if curr_node.type == "pointer_declarator":
472
+ pointer_augmentor = f"*{pointer_augmentor}"
473
+ elif curr_node.type == "array_declarator":
474
+ size_node = curr_node.child_by_field_name("size")
475
+ size: str = ""
476
+ if size_node:
477
+ size = size_node.text.decode()
478
+ array_augmentor = f"{array_augmentor}[{size}]"
479
+ elif curr_node.type == "parenthesized_declarator":
480
+ type_augmentor = f"({pointer_augmentor}{type_augmentor}{array_augmentor})"
481
+ pointer_augmentor = ""
482
+ array_augmentor = ""
483
+
484
+ curr_node = curr_node.parent
485
+
486
+ return f"{pointer_augmentor}{type_augmentor}{array_augmentor}"
487
+
488
+ def __get_comment(self, node: Node) -> str:
489
+ """
490
+ Extract the comment associated with a tree-sitter node.
491
+
492
+ Parameters
493
+ ----------
494
+ node : Node
495
+ The tree-sitter node whose
496
+
497
+ Returns
498
+ -------
499
+ str
500
+ The comment associeted with the given node.
501
+ """
502
+
503
+ docs = []
504
+ curr_node = node
505
+ while curr_node.prev_named_sibling and curr_node.prev_named_sibling.type == "comment":
506
+ curr_node = curr_node.prev_named_sibling
507
+ text = curr_node.text.decode()
508
+ docs.append(text)
509
+
510
+ return "\n".join(reversed(docs))
@@ -0,0 +1,69 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+
4
+ class CallGraph(ABC):
5
+ def __init__(self) -> None:
6
+ super().__init__()
7
+
8
+ @abstractmethod
9
+ def get_callees(self, **kwargs):
10
+ """
11
+ Given a source code, get all the callees
12
+ """
13
+ pass
14
+
15
+ @abstractmethod
16
+ def get_callers(self, **kwargs):
17
+ """
18
+ Given a source code, get all the callers
19
+ """
20
+ pass
21
+
22
+ @abstractmethod
23
+ def get_call_graph(self, **kwargs):
24
+ """
25
+ Given an application, get the call graph
26
+ """
27
+ pass
28
+
29
+ @abstractmethod
30
+ def get_call_graph_json(self, **kwargs):
31
+ """
32
+ Given an application, get call graph in JSON format
33
+ """
34
+ pass
35
+
36
+ @abstractmethod
37
+ def get_class_call_graph(self, **kwargs):
38
+ """
39
+ Given an application and a class, get call graph
40
+ """
41
+ pass
42
+
43
+ @abstractmethod
44
+ def get_entry_point_classes(self, **kwargs):
45
+ """
46
+ Given an application, get all the entry point classes
47
+ """
48
+ pass
49
+
50
+ @abstractmethod
51
+ def get_entry_point_methods(self, **kwargs):
52
+ """
53
+ Given an application, get all the entry point methods
54
+ """
55
+ pass
56
+
57
+ @abstractmethod
58
+ def get_service_entry_point_classes(self, **kwargs):
59
+ """
60
+ Given an application, get all the service entry point classes
61
+ """
62
+ pass
63
+
64
+ @abstractmethod
65
+ def get_service_entry_point_methods(self, **kwargs):
66
+ """
67
+ Given an application, get all the service entry point methods
68
+ """
69
+ pass
File without changes
@@ -0,0 +1,3 @@
1
+ from cldk.analysis.go.treesitter.go_sitter import GoSitter
2
+
3
+ __all__ = ["GoSitter"]